通用万能excel导入的Python实现
一.正言
从设计方案到算法和功能代码实现,花了我一周时间,在这里记录方便大家参考。废话少说,首先通过配置文件配置你想要的数据,只需要配置简单的表达式就可以实现由价值的数据分析和挖掘出有价值的信息数据。最终得到的是以字段为首的json数据。后期可以对excel或导出功能做成可视化,使用户的操作和体验更加便捷化。
二.配置文件
{
"updateTime": "2018-02-07",
"database_host": "10.10.10.6",
"database_port": 9400,
"database_name": "mdap-dev",
"filePath":"/projectName/input",
"fileName":"test.xlsx",
"$excelConfig": [
{
"sheetName":"数据录入" ,
"target_data_area":"A2~A31",
"target_data_key":"(x=A,y=1)",
"datas":[
{
"value": "(x=B,y=_y)",
"key":"指标小类"
},
{
"value": "(x=C,y=_y)",
"key":"(x=C,y=1)"
},
{
"value": "(x=D,y=_y)",
"key":"(x=D,y=1)"
},
{
"value": "(x=E,y=_y)",
"key":"(x=E,y=1)"
},
{
"value": "(x=F,y=_y)",
"key":"(x=F,y=1)"
}
]
}
]
}
三。配置文件说明
"updateTime": 更新时间设置
"database_host": "10.10.10.6", 存数据库地址
"database_port": 9400, 端口号
"database_name": "mdap-dev", 索引,名称
"filePath":"/projectName/input", 导入文件的路径
"fileName":"test.xlsx", 导入文件的名称
"$excelConfig": [ 文件具体参数配置
"sheetName":"数据录入" , 第一个sheet的表名称
"target_data_area":"A2~A31", 相对参照物字段对应取值区域
"target_data_key":"(x=A,y=1)",相对参照物字段
"datas":[ 其他字段配置
"value": "(x=B,y=_y)", 字段对应的(通过公式表示取值横纵方向的取值范围)
"key":"指标小类" 字段为"指标小类"
四.python代码
4.1 test.py
#coding=utf-8
import json
import config
import os, configparser
from importExcel import commonExcelImport
'''
测试各种样式的excel导入返回json数据条
通用模板
@author: andong
'''
#def loadSettingFile():
cp = configparser.ConfigParser()
Settings = dict(
config_file = os.path.join(os.path.dirname(__file__),'template.json'),
config = cp
)
path=r"D:\workSpace\allExcelExport\config\template.json"
with open(path, "r", encoding='UTF-8') as excelTemplate:
data = json.load(excelTemplate)
updateTime = data['updateTime']
filePath = data['filePath']
fileName = data['fileName']
excelConfig = data['$excelConfig']
counter = 1;
for i in range(len(excelConfig)):#遍历配置文件中每个sheet
print ("序号:%s 值:%s" % (i + 1, excelConfig[i]))
if __name__ == '__main__':
commonExcelImport(filePath,fileName,excelConfig[i])
# result = importExcel.ready_excel(filePath,fileName,excelConfig[i])
4.2 importExcel.py
import config,logging
import xlrd,xlwt
import uuid,json,re
import openpyxl,string
from openpyxl.cell import Cell
from openpyxl.workbook import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.utils import column_index_from_string
from warnings import catch_warnings
# pyexcel_xls 以 OrderedDict 结构处理数据
#from collections import OrderedDict
class MyException(Exception):
def __init__(self,message):
Exception.__init__(self)
self.message=message
class commonExcelImport(object):
def __init__(self,filePath,fileName,config):
self.filePath=filePath
self.fileName=fileName
self.config=config
self.ready_excel(filePath,fileName,config)
#coding=utf-8
'''
测试各种样式的excel导入返回json数据条
通用模板
@author: andong
'''
@classmethod
def changePath(path):
if("\\\\" in path):
pass
elif("\\" in path):
path = path.replace("\\", "//")
return path
'''
返回数据
'''
@classmethod
def ready_excel(self,filePath=None,fileName=None,config={}):
file_path=r"D:\workSpace\allExcelExport\input\指标运营数据录入.xlsx"
wb=openpyxl.load_workbook(file_path) #打开excel文件
sheetName = config["sheetName"]
if (sheetName==""):
ws=wb.active
else:
ws=wb[sheetName]
#参考系
target_data_area = config["target_data_area"]
target_data_key = config["target_data_key"]
all_sheets_name = wb.get_sheet_names() #获取工作簿所有工作表名
sheet=wb.get_sheet_by_name(sheetName) #获取工作表
if target_data_key.startswith("(") and target_data_key.endswith(")"):
target_data_key=target_data_key.lstrip('(')
target_data_key =target_data_key.rstrip(')')
target_data_key = target_data_key.split(",")[0].split("=")[1] + target_data_key.split(",")[1].split("=")[1]
target_data_key =sheet[target_data_key].value
logging.info("相对参照物的key="+target_data_key)
value = config["datas"] #其他字段数据域
sheet02=wb.get_active_sheet() #获取活动的工作表
d1 = target_data_area.split("~")[0].split("\\.")[0]
d2 = target_data_area.split("~")[1]
rownum_start=sheet[d1].row
rownum_end = sheet[d2].row
row_num = rownum_end - rownum_start +1
colnum_start = sheet[d1].column
colnum_end = sheet[d2].column
dateArea_num = self.letter_differ(colnum_start,colnum_end) #字母列差
wb=openpyxl.Workbook()
wbActiveService=wb.active
# cell_range=wbActiveService['A1':'C2']
# print(sheet.rows)
# print(len(cell_range))
# for cell in cell_range:
# print(cell)
# #操作
# print(sheet['B1'].value) #获取单元格值
# print(sheet['B1'].column) #获取单元格列值
# print(sheet['B1'].row) #获取单元格行号
# print(sheet.cell(row=8,column=1).value) #获取单元格值,column与row依然可用
# for i in range(1,4,1):
# print(sheet.cell(row=i,column=2).value) #更加方便实用
# print(sheet.max_column) #获取最大列数
# print(sheet.max_row) #获取最大行数
try:
result_excel ={}
target_data_area_value=[]
target_data_area_value.append(ws[d1:d2])
result_excel.update({target_data_key:target_data_area_value})
if value:
for index,data in enumerate(value):
logging.info(data)
logging.info(data["key"])#处理key
if data["key"].startswith("(") and data["key"].endswith(")"):
str=data["key"].lstrip('(')
str =data["key"].rstrip(')')
data_key = str.split(",")[0].split("=")[1] + str.split(",")[1].split("=")[1]
data_key =sheet[data_key].value
else:
data_key = data["key"]
result = self.parse_excel_data(file_path,sheetName,target_data_key,row_num,colnum_start,colnum_end,rownum_start,rownum_end,data_key,data,dateArea_num)
result_excel.update(result)
else:
pass
logging.info("result_excel===>",result_excel)
result = self.findDataFromExcel(file_path,sheetName,result_excel)
resultJson = self.parse_need_datas(result) #组装返回数据
return resultJson
except (KeyboardInterrupt, SystemExit):
logging.error("解析异常")
finally:
result_excel.clear()
del target_data_area_value[:]
@classmethod
def parse_excel_data(self,fileAddr=None,sheetName=None,relative_key=None,row_num=None,letter_1=None,letter_2=None,number_1=None,number_2=None,key=None,value=None,span=None):
list_value=[]
result={}
wb=openpyxl.load_workbook(filename=fileAddr,read_only=True)
if (sheetName==""):
ws=wb.active
else:
ws=wb[sheetName]
data = value['value']
sheet=wb.get_sheet_by_name(sheetName) #获取工作表
#wb=openpyxl.Workbook()
#ws=wb.active
print( data.split(",")[0].split("(")[1])
print(data.split(",")[1].split(")")[0])
try:
#if data.split(",")[0].split("(")[1].split("=")[1].find("_") !=-1 :
x_num = data.split(",")[0].split("(")[1].split("=")[1].strip()
#if data.split(",")[1].split(")")[0].split("=")[1].find("_") !=-1 :
y_num = data.split(",")[1].split(")")[0].split("=")[1].strip()
#TODO
if data.split(",")[0].split("(")[1].split("=")[1].find("_") !=-1 and data.split(",")[1].split(")")[0].split("=")[1].find("_") ==-1: #x变量
x_start_num = column_index_from_string(letter_1)
x_end_num = column_index_from_string(letter_2)+1
for num in range(row_num):
for start_num in range(x_start_num,x_end_num):
x_change_letter = get_column_letter(start_num)
list_value.append(x_change_letter+str(y_num))
#for r in range(row_num):
# list_value.append(ws[startPosition:endPosition])
elif data.split(",")[1].split(")")[0].split("=")[1].find("_") !=-1 and data.split(",")[0].split("(")[1].split("=")[1].find("_") ==-1:#y变量
for num in range(number_1,number_2+1):
for letter_num in range(span):
list_value.append(x_num + str(num))
elif data.split(",")[0].split("(")[1].split("=")[1].find("_") !=-1 and data.split(",")[1].split(")")[0].split("=")[1].find("_") !=-1 : #x,y变量
xystart_letter_befor = letter_1
xystart_letter_later = number_1
xyend_letter_befor = letter_2
xyend_letter_later = number_2
start = xystart_letter_befor+str(xystart_letter_later)
end = xyend_letter_befor+str(xyend_letter_later)
xy_col = self.letter_differ(xystart_letter_befor,xyend_letter_befor)
xy_row = int(xyend_letter_later) - int(xystart_letter_later) +1
# for col in range(xy_col):
# for row in range(xy_row):
# list_value.append(ws[start:end])
list_value.append(ws[start:end])
else : # x,y常量
val = x_num +y_num
for x in range(span):
for y in range(number_2-number_1+1):
list_value.append(val)
jsondata={}
jsondata[key] = list_value
jsondata.update({key:list_value})
return jsondata
except Exception:
raise MyException("解析excel数据异常!")
'''
从excel中匹配数据
和解决合并单元格取值
'''
@classmethod
def findDataFromExcel(self,fileAddr=None,sheetName=None,jsondata={}):
json_result={}
#sheetContent.get_highest_row() #sheetContent.get_highest_row()
for index,value in enumerate(jsondata):
print(jsondata[value])
result = self.parseExcelGetDatas(value,fileAddr,sheetName,jsondata[value])
json_result.update(result)
#遍历所有excel
# for row in ws.rows:
# list=[]
# #遍历所有excel表格行数据
# for cell in row:
# val=str(cell.value)
# print(val)
logging.info(json_result)
return json_result
'''
解析得到excel对应数据
'''
@classmethod
def parseExcelGetDatas(self,value=None,fileAddr=None,sheetName=None,jsondata=None):
wb=openpyxl.load_workbook(filename=fileAddr,read_only=True)
if (sheetName==""):
ws=wb.active
else:
ws=wb[sheetName]
sheet=wb.get_sheet_by_name(sheetName) #获取工作表
resultJson=[]
result={}
workbook = xlrd.open_workbook(fileAddr)
sheet1 = workbook.sheet_by_name(sheetName)
merged_cell = sheet1.merged_cells
for i in range(len(jsondata)):
if isinstance(jsondata[i], (tuple)) :
jsonData=[]
for y in jsondata[i]:
for ii in y:
print(str(ii).split(".")[1].rstrip(">"))
print(sheet[str(ii).split(".")[1].rstrip(">")].value)
deal_data = str(ii).split(".")[1].rstrip(">")
for ml in range(len(merged_cell)):
merged_cell_row1 = merged_cell[ml][0]
merged_cell_row2 = merged_cell[ml][1]
merged_cell_col1 = merged_cell[ml][2]
merged_cell_col2 = merged_cell[ml][3]
re_number_data = int(re.sub("\D", "", deal_data))#取子数字
re_letter = re.sub("\d", "", deal_data)#取子字母
re_letter_data = column_index_from_string(re_letter)
if (merged_cell_col1 < re_letter_data and re_letter_data <= merged_cell_col2) and (re_number_data > merged_cell_row1 and re_number_data <= merged_cell_row2):
needCell_data = sheet1.cell_value(merged_cell_row1,merged_cell_col1)
resultJson.append(needCell_data)
break
elif ml < len(merged_cell)-1:
continue
else:
resultJson.append(sheet[deal_data].value)
else:
for ml in range(len(merged_cell)):
merged_cell_row1 = merged_cell[ml][0]
merged_cell_row2 = merged_cell[ml][1]
merged_cell_col1 = merged_cell[ml][2]
merged_cell_col2 = merged_cell[ml][3]
re_number_data = int(re.sub("\D", "", jsondata[i])) #取子数字
re_letter = re.sub("\d", "", jsondata[i])#取子字母
re_letter_data = column_index_from_string(re_letter)
if (merged_cell_col1 < re_letter_data and re_letter_data <= merged_cell_col2) and (re_number_data > merged_cell_row1 and re_number_data <= merged_cell_row2):
needCell_data = sheet1.cell_value(merged_cell_row1,merged_cell_col1)
resultJson.append(needCell_data)
break
elif ml < len(merged_cell)-1:
continue
else:
resultJson.append(sheet[jsondata[i]].value)
result[value] = resultJson
result.update({value:resultJson})
#result.update({value:resultJson})
return result
'''
dict直接序列化为json对象
'''
'''
组装需要的数据
'''
@classmethod
def parse_need_datas(self,jsonData=None):
key_data=[]
result=[]
key_name=[]
# 创建一个logger
logger = logging.getLogger()
for key in jsonData:
key_data.append(key)
key_name.append(key)
result.append(key_name)
for i in range(len(jsonData[key_data[0]])):
resultJson=[]
for j in jsonData:
resultJson.append(jsonData[j][i])
result.append(resultJson)
#del resultJson[0:]
logger.info(result)
print(result)
return result
'''
excel纵匹配
@param obj2dData
A~ZZZ
字母相减算法
'''
@classmethod
def letter_differ(self,start=None,end=None):
X = []
Y = []
for i in range(26):
X.append(i+1)
Y.append(chr(65+i))
for i in range(26):
for j in range(26):
Y.append(chr(65+i)+chr(65+j))
for i in range(26):
for j in range(26):
for k in range(26):
Y.append(chr(65+i)+chr(65+j)+chr(65+k))
for k in range(26*26*26):
if k > 26:
X.append(k)
#print(Y)
result = Y.index(end)-Y.index(start) +1
return result
觉得很6 请多多关注!!!