通用万能excel导入的Python实现

通用万能excel导入的Python实现


一.正言

        从设计方案到算法和功能代码实现,花了我一周时间,在这里记录方便大家参考。废话少说,首先通过配置文件配置你想要的数据,只需要配置简单的表达式就可以实现由价值的数据分析和挖掘出有价值的信息数据。最终得到的是以字段为首的json数据。后期可以对excel或导出功能做成可视化,使用户的操作和体验更加便捷化。


二.配置文件

      
{
	"updateTime": "2018-02-07",           
	"database_host": "10.10.10.6",         
	"database_port": 9400,                
	"database_name": "mdap-dev",    
	"filePath":"/projectName/input", 
	"fileName":"test.xlsx", 	      
	"$excelConfig": [  					  
		{
			"sheetName":"数据录入" ,  	  
			"target_data_area":"A2~A31",
			"target_data_key":"(x=A,y=1)",
			"datas":[ 					
			 		{
						"value": "(x=B,y=_y)",
						"key":"指标小类"
					},
					{
						"value": "(x=C,y=_y)",
						"key":"(x=C,y=1)"
					},
					{
						"value": "(x=D,y=_y)",
						"key":"(x=D,y=1)"
					},
					{
						"value": "(x=E,y=_y)",
						"key":"(x=E,y=1)"
					},
					{
						"value": "(x=F,y=_y)",
						"key":"(x=F,y=1)"
					}
				
			 ]
			
		}
		
	]
}


三。配置文件说明

              
"updateTime": 更新时间设置
"database_host": "10.10.10.6",  存数据库地址       
"database_port": 9400,           端口号    
"database_name": "mdap-dev",    索引,名称
"filePath":"/projectName/input",  导入文件的路径
"fileName":"test.xlsx", 	      	导入文件的名称
"$excelConfig": [  			               文件具体参数配置    
	"sheetName":"数据录入" ,  	  第一个sheet的表名称
	"target_data_area":"A2~A31", 相对参照物字段对应取值区域
	"target_data_key":"(x=A,y=1)",相对参照物字段
	"datas":[ 					 其他字段配置
				"value": "(x=B,y=_y)",    字段对应的(通过公式表示取值横纵方向的取值范围)
				"key":"指标小类" 		 字段为"指标小类"

四.python代码

       4.1 test.py

        

#coding=utf-8
import json
import config
import os, configparser
from importExcel import commonExcelImport 
'''
测试各种样式的excel导入返回json数据条 
通用模板
@author: andong

'''

#def loadSettingFile():

cp = configparser.ConfigParser()
Settings = dict(
    config_file = os.path.join(os.path.dirname(__file__),'template.json'),
    config = cp
)

path=r"D:\workSpace\allExcelExport\config\template.json"
with open(path, "r", encoding='UTF-8') as excelTemplate:
    data = json.load(excelTemplate)
    updateTime = data['updateTime']
    filePath = data['filePath']
    fileName = data['fileName']
    excelConfig = data['$excelConfig']
    counter = 1;
    for i in range(len(excelConfig)):#遍历配置文件中每个sheet
        print ("序号:%s   值:%s" % (i + 1, excelConfig[i]))
        if __name__ == '__main__':
            commonExcelImport(filePath,fileName,excelConfig[i])
            
       # result = importExcel.ready_excel(filePath,fileName,excelConfig[i])
        




4.2 importExcel.py

    

import config,logging
import xlrd,xlwt
import uuid,json,re
import openpyxl,string
from openpyxl.cell import Cell
from openpyxl.workbook import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.utils import column_index_from_string
from warnings import catch_warnings
# pyexcel_xls 以 OrderedDict 结构处理数据  
#from collections import OrderedDict  
  
class MyException(Exception):
    def __init__(self,message):
        Exception.__init__(self)
        self.message=message
        
class commonExcelImport(object):
    def __init__(self,filePath,fileName,config):
        self.filePath=filePath
        self.fileName=fileName
        self.config=config
        self.ready_excel(filePath,fileName,config)
    #coding=utf-8
    '''
    测试各种样式的excel导入返回json数据条 
    通用模板
    @author: andong
    
    '''
    @classmethod
    def changePath(path):
        if("\\\\" in path):
            pass
        elif("\\" in path):
            path = path.replace("\\", "//") 
        return path 
    
    
    '''
    返回数据
    '''
    @classmethod
    def ready_excel(self,filePath=None,fileName=None,config={}):
        file_path=r"D:\workSpace\allExcelExport\input\指标运营数据录入.xlsx"
        wb=openpyxl.load_workbook(file_path)  #打开excel文件
        sheetName = config["sheetName"]
        if (sheetName==""):
            ws=wb.active
        else:
            ws=wb[sheetName]
        #参考系
        target_data_area = config["target_data_area"]
        target_data_key = config["target_data_key"]
        all_sheets_name = wb.get_sheet_names() #获取工作簿所有工作表名
        sheet=wb.get_sheet_by_name(sheetName)  #获取工作表
        if target_data_key.startswith("(") and target_data_key.endswith(")"):
            target_data_key=target_data_key.lstrip('(')
            target_data_key =target_data_key.rstrip(')')
            target_data_key = target_data_key.split(",")[0].split("=")[1] + target_data_key.split(",")[1].split("=")[1]
            target_data_key =sheet[target_data_key].value
        logging.info("相对参照物的key="+target_data_key)
        value = config["datas"] #其他字段数据域
        sheet02=wb.get_active_sheet()  #获取活动的工作表
        d1 = target_data_area.split("~")[0].split("\\.")[0]
        d2 = target_data_area.split("~")[1]
        rownum_start=sheet[d1].row
        rownum_end = sheet[d2].row
        row_num = rownum_end - rownum_start +1
        colnum_start = sheet[d1].column
        colnum_end = sheet[d2].column
        dateArea_num = self.letter_differ(colnum_start,colnum_end) #字母列差
        wb=openpyxl.Workbook()
        wbActiveService=wb.active
    #     cell_range=wbActiveService['A1':'C2']
    #     print(sheet.rows)
    #     print(len(cell_range))
    #     for cell in cell_range:
    #         print(cell)
    #     #操作
    #     print(sheet['B1'].value)  #获取单元格值
    #     print(sheet['B1'].column)  #获取单元格列值
    #     print(sheet['B1'].row)  #获取单元格行号
    #     print(sheet.cell(row=8,column=1).value)  #获取单元格值,column与row依然可用
    #     for i in range(1,4,1):
    #         print(sheet.cell(row=i,column=2).value) #更加方便实用
    #     print(sheet.max_column)  #获取最大列数
    #     print(sheet.max_row)  #获取最大行数
        try:
            result_excel ={}
            target_data_area_value=[]
            target_data_area_value.append(ws[d1:d2])   
            result_excel.update({target_data_key:target_data_area_value})
            if value:
                for index,data in enumerate(value):
                    logging.info(data)
                    logging.info(data["key"])#处理key
                    if data["key"].startswith("(") and data["key"].endswith(")"):
                        str=data["key"].lstrip('(')
                        str =data["key"].rstrip(')')
                        data_key = str.split(",")[0].split("=")[1] + str.split(",")[1].split("=")[1]
                        data_key =sheet[data_key].value
                    else:
                        data_key = data["key"]
                    result = self.parse_excel_data(file_path,sheetName,target_data_key,row_num,colnum_start,colnum_end,rownum_start,rownum_end,data_key,data,dateArea_num)
                    result_excel.update(result)
            else:
                pass
            logging.info("result_excel===>",result_excel)
            result = self.findDataFromExcel(file_path,sheetName,result_excel)
            resultJson = self.parse_need_datas(result) #组装返回数据
            return resultJson
        except (KeyboardInterrupt, SystemExit):
            logging.error("解析异常")
        finally:
            result_excel.clear()
            del target_data_area_value[:]
    
    @classmethod
    def parse_excel_data(self,fileAddr=None,sheetName=None,relative_key=None,row_num=None,letter_1=None,letter_2=None,number_1=None,number_2=None,key=None,value=None,span=None):
        list_value=[]
        result={}
        wb=openpyxl.load_workbook(filename=fileAddr,read_only=True)
        if (sheetName==""):
            ws=wb.active
        else:
            ws=wb[sheetName]
        data = value['value']
        sheet=wb.get_sheet_by_name(sheetName)  #获取工作表
        #wb=openpyxl.Workbook()
        #ws=wb.active
        print( data.split(",")[0].split("(")[1])
        print(data.split(",")[1].split(")")[0])
        try:
            #if data.split(",")[0].split("(")[1].split("=")[1].find("_") !=-1 :
            x_num = data.split(",")[0].split("(")[1].split("=")[1].strip()
            #if data.split(",")[1].split(")")[0].split("=")[1].find("_") !=-1 :
            y_num = data.split(",")[1].split(")")[0].split("=")[1].strip()
            #TODO
            if data.split(",")[0].split("(")[1].split("=")[1].find("_") !=-1 and data.split(",")[1].split(")")[0].split("=")[1].find("_") ==-1: #x变量
                x_start_num = column_index_from_string(letter_1)
                x_end_num = column_index_from_string(letter_2)+1
                for num in range(row_num):
                    for start_num in range(x_start_num,x_end_num):
                        x_change_letter = get_column_letter(start_num)
                        list_value.append(x_change_letter+str(y_num))
                #for r in range(row_num):
                #    list_value.append(ws[startPosition:endPosition])
            elif data.split(",")[1].split(")")[0].split("=")[1].find("_") !=-1 and data.split(",")[0].split("(")[1].split("=")[1].find("_") ==-1:#y变量
                for num in range(number_1,number_2+1):
                    for letter_num in range(span):
                        list_value.append(x_num + str(num))
                        
            elif data.split(",")[0].split("(")[1].split("=")[1].find("_") !=-1 and data.split(",")[1].split(")")[0].split("=")[1].find("_") !=-1 : #x,y变量
                xystart_letter_befor = letter_1
                xystart_letter_later = number_1
                xyend_letter_befor = letter_2
                xyend_letter_later = number_2
                start = xystart_letter_befor+str(xystart_letter_later)
                end = xyend_letter_befor+str(xyend_letter_later)
                xy_col = self.letter_differ(xystart_letter_befor,xyend_letter_befor)
                xy_row = int(xyend_letter_later) - int(xystart_letter_later) +1
        #         for col in range(xy_col):
        #             for row in range(xy_row):
        #                 list_value.append(ws[start:end])
                list_value.append(ws[start:end])
            else : # x,y常量
                val = x_num +y_num
                for x in range(span):
                    for y in range(number_2-number_1+1):
                        list_value.append(val)
                
            jsondata={}
            jsondata[key] = list_value
            jsondata.update({key:list_value})
            return jsondata
        except Exception:
            raise MyException("解析excel数据异常!")
    
     
     
    '''
        从excel中匹配数据
        和解决合并单元格取值
    '''
    @classmethod
    def findDataFromExcel(self,fileAddr=None,sheetName=None,jsondata={}):
        json_result={}
        #sheetContent.get_highest_row() #sheetContent.get_highest_row()
        for index,value in enumerate(jsondata):
            print(jsondata[value])
            result = self.parseExcelGetDatas(value,fileAddr,sheetName,jsondata[value])
            json_result.update(result)
        #遍历所有excel
    #     for row in ws.rows:
    #         list=[] 
    #         #遍历所有excel表格行数据
    #         for cell in row:
    #             val=str(cell.value)
    #             print(val)
        logging.info(json_result)
        return json_result
    
    '''
    解析得到excel对应数据
    '''
    @classmethod
    def parseExcelGetDatas(self,value=None,fileAddr=None,sheetName=None,jsondata=None):
        wb=openpyxl.load_workbook(filename=fileAddr,read_only=True)
        if (sheetName==""):
            ws=wb.active
        else:
            ws=wb[sheetName]
        sheet=wb.get_sheet_by_name(sheetName)  #获取工作表
        resultJson=[]
        result={}
        workbook = xlrd.open_workbook(fileAddr)
        sheet1 = workbook.sheet_by_name(sheetName)
        merged_cell = sheet1.merged_cells
        for i in range(len(jsondata)):
            if isinstance(jsondata[i], (tuple)) :
                jsonData=[]
                for y in jsondata[i]:
                    for ii in y:
                        print(str(ii).split(".")[1].rstrip(">"))
                        print(sheet[str(ii).split(".")[1].rstrip(">")].value)
                        deal_data = str(ii).split(".")[1].rstrip(">")
                        for ml in range(len(merged_cell)):
                            merged_cell_row1 = merged_cell[ml][0]
                            merged_cell_row2 = merged_cell[ml][1]
                            merged_cell_col1 = merged_cell[ml][2]
                            merged_cell_col2 = merged_cell[ml][3]
                            re_number_data = int(re.sub("\D", "", deal_data))#取子数字
                            re_letter = re.sub("\d", "", deal_data)#取子字母
                            re_letter_data = column_index_from_string(re_letter)
                            if (merged_cell_col1 < re_letter_data and re_letter_data <= merged_cell_col2) and (re_number_data > merged_cell_row1 and re_number_data <= merged_cell_row2):
                                needCell_data = sheet1.cell_value(merged_cell_row1,merged_cell_col1)
                                resultJson.append(needCell_data)
                                break
                            elif ml < len(merged_cell)-1:
                                continue
                            else:
                                resultJson.append(sheet[deal_data].value)
            else:
                for ml in range(len(merged_cell)):
                    merged_cell_row1 = merged_cell[ml][0]
                    merged_cell_row2 = merged_cell[ml][1]
                    merged_cell_col1 = merged_cell[ml][2]
                    merged_cell_col2 = merged_cell[ml][3]
                    re_number_data = int(re.sub("\D", "", jsondata[i])) #取子数字
                    re_letter = re.sub("\d", "", jsondata[i])#取子字母
                    re_letter_data = column_index_from_string(re_letter)
                    if (merged_cell_col1 < re_letter_data and re_letter_data <= merged_cell_col2) and (re_number_data > merged_cell_row1 and re_number_data <= merged_cell_row2):
                        needCell_data = sheet1.cell_value(merged_cell_row1,merged_cell_col1)
                        resultJson.append(needCell_data)
                        break 
                    elif ml < len(merged_cell)-1:
                        continue
                    else:
                        resultJson.append(sheet[jsondata[i]].value)
        result[value] = resultJson
        result.update({value:resultJson})
        #result.update({value:resultJson})
        return result
    '''
    dict直接序列化为json对象 
    
    '''
    
    
    
    '''
    组装需要的数据
    '''
    @classmethod
    def parse_need_datas(self,jsonData=None):
        key_data=[]
        result=[]
        key_name=[]
        # 创建一个logger    
        logger = logging.getLogger()  
        for key in jsonData:
           key_data.append(key)
           key_name.append(key)
        result.append(key_name)
        for i in range(len(jsonData[key_data[0]])):
            resultJson=[]
            for j in jsonData:
               resultJson.append(jsonData[j][i]) 
            result.append(resultJson)
            #del resultJson[0:]
        logger.info(result)
        print(result)
        return result
       
    '''
    excel纵匹配
    @param obj2dData
    A~ZZZ
    字母相减算法
    '''
    @classmethod
    def letter_differ(self,start=None,end=None):
        X = []  
        Y = []  
        for i in range(26):  
            X.append(i+1)  
            Y.append(chr(65+i))
        for i in range(26):  
            for j in range(26): 
                 Y.append(chr(65+i)+chr(65+j))        
        for i in range(26):  
            for j in range(26): 
                for k in range(26): 
                    Y.append(chr(65+i)+chr(65+j)+chr(65+k))  
                 
        for k in range(26*26*26): 
            if k > 26:
                X.append(k)
        #print(Y)  
        result = Y.index(end)-Y.index(start) +1
        return result



觉得很6 请多多关注!!!


发布了28 篇原创文章 · 获赞 50 · 访问量 4万+

猜你喜欢

转载自blog.csdn.net/qq_21873747/article/details/79531032