在excl中加入一列并批量导入mysql数据库(先转化成Dataframe格式)

  现将不含文件转成数据帧格式,因为这个格式的文件蟒中有大量的函数可以对其进行操作。

from datetime import datetime

import numpy as np
import pandas as pd
import pymysql
from sqlalchemy import create_engine
import time
import os


def export(Folder_file_total):
    file = [] # 建立一个空的文件夹来储存错误的信息
    for Folder_file in [a for a, b, c in os.walk(Folder_file_total, topdown=True)][1:]:


        excel_files = sorted(os.listdir(Folder_file), key=lambda x: int(x[-7:-5]))
        excel_files = [Folder_file + '\\' + i for i in excel_files]
        print(excel_files)
        engine = create_engine(
            'mysql+pymysql://xxx:xxx@xxx:3306/test?charset=utf8')  # //username:password@地址:端口号/数据库名字?字符编码

        for excel_file in excel_files[5:]:
            start = time.time()
            if excel_file.split('\\')[-1][:2] == '': #可能在打开文件夹的时候产生一些临时文件,避开临时文件
                continue
            try:                                      #某些excl文件可能打不开
                df1 = pd.read_excel(excel_file)
            except Exception as e:
                file.append(e)
            else:

                col_name = df1.columns.tolist()
                col_name.insert(col_name.index('城市'), '日期')  # 在 城市 列前面插入
                df1 = df1.reindex(columns=col_name) #并不修改原对象,返回一个新的对象
                
                df1['日期'] = [excel_file[-15:-5] for _ in  range(df1.shape[0])]
                print('叮!开始导入...')
                df1['日期'].astype(object)
                df1['日期'] = df1['日期'].apply(lambda x:str(x).split(' ')[0]) #dt1[’日期‘]
                # print(df1['日期'])
                df1.to_sql('投件快递员_明细', engine, index = False, if_exists = 'append')
                end = time.time()

                print("excl: " + excel_file + " has been inserted " + str(df1.iloc[:,1].count()) + " datas!")#
                print("excl: " + excel_file + " has been costed %s s" % (end - start))
                print('There is a total %d has been imported  %d 个表,Completion degree %d%%' % (
                    len(excel_files), (excel_files.index(excel_file) + 1),
                    (excel_files.index(excel_file) + 1) * 100 / len(excel_files)))


if __name__ =="__main__":
    first = time.time()
    filename = r'C:\Users\lenovo\Desktop\test2'
    export(filename)

    last = time.time()

    print('这件事共用时 %f h' % round((last - first) / 3600, 2))

猜你喜欢

转载自blog.csdn.net/HUIxihuanni/article/details/85081760