xlrd,xlwt,pandas在excel中实战经验（此项目为自动化输出杭州市余杭区规划数据汇总统计）

下面为统计前的大概格式：
###########################################
下面为统计后处理的结果：（所有数据均做作假或者隐藏处理，防止追究责任）
##################################
#下面是相关代码，仅供参考，比较乱。数据处理前后表格都放在‘E:\001’目录下
#!/usr/bin/env python
#coding:utf-8
import xlrd,xlwt,xlutils,pandas,os,re
import xlrd as xr
from xlwt import *
import xlwt as xw
#设置框线
style_zw = XFStyle()
style_bt = XFStyle()

borders = Borders()
borders.left = 1
borders.right = 1
borders.top = 1
borders.bottom = 1
style_zw.borders = borders
style_bt.borders = borders
#设置字体  正文
fnt = Font()   # 创建一个文本格式，包括字体、字号和颜色样式特性
fnt.name = u'宋体'  # 设置其字体为微软雅黑
style_zw.font = fnt
#标题
fnt1 = Font()   # 创建一个文本格式，包括字体、字号和颜色样式特性
fnt1.bold = True
fnt1.name = u'宋体'  # 设置其字体为微软雅黑
style_bt.font = fnt1
#设置上下居中
alignment = xlwt.Alignment()
alignment.horz = xlwt.Alignment.HORZ_CENTER    #水平居中
alignment.vert = xlwt.Alignment.VERT_CENTER    #垂直居中
style_zw.alignment = alignment
style_bt.alignment = alignment

#append_col(lie,name,lists)
def append_col(lie,name,keys):
    newsht.write(0, lie, name)
    colskey = oldsht.row_values(0)
    for i in range(row_old):
        sumnum = 0
        if i == 0:
            continue
        for j in range(col_old):
            for key in keys:
                if colskey[j] == key:
                    item = oldsht.cell(i, j).value
                    sumnum += item
                    continue

        newsht.write(i, lie, sumnum)

#append_col_by_re(lie,name,head_all，long);列，列名称，正则起始规律,正则匹配长度
def append_col_by_re(lie,name,head_all,long):
    # 把第一行GHYTDM写入列表hz
    hz = []
    for i in oldsht.row_values(0):
        hz.append(i)
    # 汇总标题值计算。kong即为符合汇总标题要求的组成的列表
    # hz为第一行GHYTDM组成的列表
    liebiao = []
    for i in hz:
        if len(i) == long:
            m = re.search("^{name}".format(name = head_all), i)
            if m is not None:
                liebiao.append(i)

    for x in range(1, row_old):
        num = 0
        for y in range\
                    (col_old):
            for lie_zhi in liebiao:
                if oldsht.cell_value(0, y) == lie_zhi:
                    num += oldsht.cell_value(x, y)
        newsht.write(x, lie, num)
    newsht.write(0, lie, name)

#赋值lie1 = lie2,x列等于y列
def lie1_lie2(x,y):
    for hang in range(1, row_old):
        a = oldsht.cell_value(hang,x)
        newsht.write(hang,y,"%s"%a)
    # 读取所有xls文件

#添加编号从x行y列开始
def bianhao(hang1,lie1):
    global bhlie
    bhlie = 0
    for i in range(hang1,row_old):
        bhlie += 1
        newsht.write(i, lie1, bhlie)

def GetAllXlsFiles(path):
    ret = []
    if os.path.isfile(path) and os.path.splitext(path)[1] =='.xls':
        ret.append(path)
    elif os.path.isdir(path):
        allfile = os.listdir(path)
        for file in allfile:
            filepath = os.path.join(path,file)
            if os.path.isdir(filepath):
                ret += GetAllXlsFiles(filepath)
            elif os.path.isfile(filepath) and os.path.splitext(filepath)[1]=='.xls':
                ret.append(filepath)
    return ret

# 文档汇总处理;key为标题行需处理数据的列表；sumname为汇总名称。函数的每次修改都涉及文档的打开和保存，因此行列数值会一直更新到最新状态
def GetExcelKeySum(excelname,sheetname,keys,sumname):
    book = xr.open_workbook(excelname)
    table = book.sheet_by_name(sheetname)
    cols = table.ncols
    rows = table.nrows
    wbook = copy(book)  # 创建一个excel文件
    wtable = wbook.get_sheet(sheetname)  # 创建一个工作表对象
    wtable.write(0, cols, sumname)
    colskey = table.row_values(0)
    for i in range(rows):
        sumnum = 0
        if i==0:
            continue
        for j in range(cols):
            for key in keys:
                if colskey[j] == key:
                    item = table.cell(i,j).value
                    sumnum += item
                    continue

        wtable.write(i,cols,sumnum)
    wbook.save(excelname)

#start

allfiles = GetAllXlsFiles(r"E:\001")
for x in allfiles:
    temp = os.path.splitext(x)
    oldfilename = x
    newfilename = temp[0] + "HZHG" + temp[1]
    #数据透视表
    from xlrd import open_workbook
    import pandas as pd
    import numpy as np
    data = pd.read_excel(r"%s"%oldfilename,sheet_name = 0)
    df = pd.DataFrame(data)
    data_pivot = pd.pivot_table(df,index=['XMMC'],columns='GHYTDM', values='Shape_Area',
                                aggfunc=np.sum, fill_value=0,margins=True, margins_name='总计')
    # 第一次保存
    data_pivot.to_excel(newfilename)
    # 重新打开,识别行数列数
    oldWb = xlrd.open_workbook(newfilename, formatting_info=True)
    oldsht= oldWb.sheet_by_index(0)
    row_old = oldsht.nrows
    col_old = oldsht.ncols
    from xlutils.copy import copy
    newWb = copy(oldWb)
    newsht = newWb.get_sheet("sheet1")
    #开始添加列
    append_col(col_old + 4, "示范区基本农田", ['G111'])
    append_col(col_old + 5, "一般基本农田", ['G112'])
    append_col(col_old + 6, "一般农田", ['N111'])
    append_col(col_old + 7, "新增一般农田", ['N112'])
    append_col(col_old + 8, "园地", ['X12'])
    append_col(col_old + 9, "林地", ['X13'])
    append_col(col_old + 10, "其他农用地", ['X151',"X152","X153","X154","G12","G13"])
    append_col_by_re(col_old + 12, "存量建设用地", "X2",4)
    append_col_by_re(col_old + 13, "新增建设用地", "G2",4)
    append_col(col_old + 14, "预测新增建设用地", ['Z1'])
    append_col(col_old + 16, "河流水面", ['X311'])
    append_col(col_old + 17, "其余未利用地", ['X312',"X313","X32"])
    newWb.save(newfilename)
    #第二次打开
    oldWb = xlrd.open_workbook(newfilename, formatting_info=True)
    oldsht = oldWb.sheet_by_index(0)
    row_old = oldsht.nrows
    col_old = oldsht.ncols
    from xlutils.copy import copy
    newWb = copy(oldWb)
    newsht = newWb.get_sheet("sheet1")
    append_col(col_old - 15, "小计", ["示范区基本农田","一般基本农田","一般农田","新增一般农田","园地","林地","其他农用地"])
    append_col(col_old  - 7, "小计", ['存量建设用地', "新增建设用地", "预测新增建设用地"])
    append_col(col_old - 3, "小计", ['河流水面', "其余未利用地"])
    append_col(col_old - 16, "总面积", ["示范区基本农田","一般基本农田","一般农田","新增一般农田","园地","林地","其他农用地",'存量建设用地', "新增建设用地", "预测新增建设用地",'河流水面', "其余未利用地"])
    #项目名称列
    newsht.write(0, col_old - 17, "项目名称")
    lie1_lie2(0, col_old - 17)
    #添加编号列
    newsht.write(0,col_old - 18,"编号")
    bianhao(1,col_old - 18 )
    newWb.save(newfilename)

    #第三次打开
    oldWb = xlrd.open_workbook(newfilename, formatting_info=True)
    oldsht = oldWb.sheet_by_index(0)
    row_old = oldsht.nrows
    col_old = oldsht.ncols
    from xlutils.copy import copy
    newWb = copy(oldWb)
    newsht = newWb.get_sheet("sheet1")
    #清空列表
    for i in range(row_old):
        for j in range(col_old):
            newsht.write(i,j,"")
    #重新写入数据,x,y为新表的横纵坐标
    for x in range(5,row_old + 4):
        for y in range(18):
            m = oldsht.cell_value(x - 4,col_old + y - 18)
            newsht.write(x,y,m,style_zw)
    newsht.write_merge(2,4,0,0,"编号",style_zw)
    newsht.write_merge(2, 4, 1, 1, "项目名称",style_zw)
    newsht.write_merge(2, 4, 2, 2, "总面积",style_zw)
    newsht.write_merge(3, 3, 3, 10, "农用地(不符合规划)",style_zw)
    newsht.write_merge(3, 3, 11, 14, "建设用地(符合规划)",style_zw)
    newsht.write_merge(3, 3, 15, 17, "未利用地(不符合规划)",style_zw)
    newsht.write_merge(2, 2, 3, 17, "规划信息",style_zw)
    newsht.write_merge(0, 0, 0, 17, "余杭区土地利用总体规划(2006-2020年)规划信息查询表",style_bt)
    newsht.write(4,3,"小计",style_zw)
    newsht.write(4,4, "示范区基本农田",style_zw)
    newsht.write(4,5, "一般基本农田",style_zw)
    newsht.write(4,6, "一般农田",style_zw)
    newsht.write(4,7, "新增一般农田",style_zw)
    newsht.write(4,8, "园地",style_zw)
    newsht.write(4,9, "林地",style_zw)
    newsht.write(4,10, "其他农用地",style_zw)
    newsht.write(4,11, "小计",style_zw)
    newsht.write(4,12, "存量建设用地",style_zw)
    newsht.write(4,13, "新增建设用地",style_zw)
    newsht.write(4,14, "预测新增建设用地",style_zw)
    newsht.write(4,15, "小计",style_zw)
    newsht.write(4,16, "河流水面",style_zw)
    newsht.write(4,17, "其余未利用地",style_zw)
    newsht.write(1,15, "单位:亩",style_zw)

    #最终保存
    newWb.save(newfilename)
print("过程无异常")
os.system("pause")
xlrd,xlwt,pandas在excel中实战经验（此项目为自动化输出杭州市余杭区规划数据汇总统计）

猜你喜欢