Use Python para filtrar Excel por valor de columna y dividir la tabla en varios archivos

 

 

Escenario: cuando el centro de grupo emitió los datos provinciales, no estaba dividido por ciudad de prefectura y empresa. Ahora debe dividirse por ciudad de prefectura y empresa y distribuido a ciudades de prefectura.

Este artículo utiliza el paquete pandas de Python para lograr los escenarios anteriores.

Los ejemplos de datos son los siguientes:

El Excel anterior debe filtrarse y dividirse por ciudad y guardarse como un archivo para cada ciudad.

ahora, muestra el código:

 

# -*- coding: utf-8 -*-
"""
Created on Fri Nov  1 09:53:30 2019

@author: lanxuxml

应用场景:

	包含多个sheet的Excel 需要按列筛选出来另存为其它文件

"""

import pandas as pd

split_excel_name_head = r'F:\20191129\jidi\湖南省_JD_'

split_excel_name_tail = r'_疑似同一客户合并.xlsx'

xlsx_name = r"F:\20191129\湖南省_JD_疑似同一客户合并.xls"

#用来筛选的列名
filter_column_name = u'市'
filter_column_name_quxian = u'区县'
#获取所有sheet名
df = pd.ExcelFile(xlsx_name)
sheet_names = df.sheet_names
#删除sheet名为数据量的sheet 
sheet_names.remove('数据量')
#不需要筛选的sheet名
sheet_not_filter_names = ['场景说明']

#将该列去重后保存为list
city_names = []
#从市 区县 两列 获取所有sheet中的唯一值
for sheet_name in sheet_names:
    try :
        tmp_df = pd.read_excel(xlsx_name, sheet_name=sheet_name)
        city_names = city_names + tmp_df[filter_column_name].unique().tolist() + tmp_df[filter_column_name_quxian].unique().tolist()
    except :
        pass
#与实际市名求交集再去重,避免出现空表或者空sheet
city_names_unique_all = ['娄底市', '湘西土家族苗族自治州', '邵阳市', '常德市', '长沙市', '株洲市', '郴州市', '益阳市', '永州市', '怀化市', '湘潭市', '衡阳市', '岳阳市','张家界市']
city_names = list(set(city_names).intersection(set(city_names_unique_all)))
city_names = list(set(city_names))
#所有场景没有数据的市
city_no_data = list(set(city_names_unique_all).difference(set(city_names)))
print(city_no_data)
tmp_report_writer = pd.ExcelWriter(xlsx_name + u"_拆分报告_临时.xlsx")
report_writer = pd.ExcelWriter(xlsx_name + u"_拆分报告.xlsx")

for city_name in city_names:
    city_excel_name = split_excel_name_head + str(city_name) + split_excel_name_tail
    writer = pd.ExcelWriter(city_excel_name)  
    #将city_name转为list
    city_name_to_list = []
    city_name_to_list.append(city_name)
    tmp_df = pd.DataFrame(city_no_data)
    
    for sheet_name in sheet_names:
        tmp_df = pd.read_excel(xlsx_name, sheet_name=sheet_name)
        tmp_sliced = pd.DataFrame()
        if sheet_name not in sheet_not_filter_names:
        	tmp_sheet = tmp_df[tmp_df[filter_column_name].isin(city_name_to_list)]#按市
        	tmp_sheet = tmp_sheet.append(tmp_df[tmp_df[filter_column_name_quxian].isin(city_name_to_list)])#增加按区县有地市的行
        	tmp_sheet = tmp_sheet.drop_duplicates(keep = 'first')#去重
            #无法通过 市 区县两列拆分的数据
        	tmp_sliced = tmp_df.append(tmp_sheet).drop_duplicates(keep = False)
        else :
        	tmp_sheet = tmp_df
            
        tmp_sheet.to_excel(excel_writer=writer, sheet_name=sheet_name, encoding="utf-8", index=False)
        tmp_sliced.to_excel(excel_writer=tmp_report_writer, sheet_name=sheet_name, encoding="utf-8", index=False)
      
    writer.save()
    writer.close()
    tmp_report_writer.save()
    tmp_report_writer.close()
    
for sheet_name in sheet_names:
    ori_df = pd.read_excel(xlsx_name, sheet_name=sheet_name)
    sli_df = pd.read_excel(xlsx_name + u"_拆分报告_临时.xlsx", sheet_name=sheet_name)
    ori_df = ori_df.append(sli_df).drop_duplicates(keep = False)
    ori_df.to_excel(excel_writer=report_writer, sheet_name=sheet_name, encoding="utf-8", index=False)
    report_writer.save()
    report_writer.close()
    


 

Supongo que te gusta

Origin blog.csdn.net/lanxuxml/article/details/102891127
Recomendado
Clasificación