python多找表格进行数据对比

三张表格数据(上万条)需要比对,看是否有重复存在,使用excel自身进行数据核对显然数据量比较庞大,所以考虑使用python脚本来实现。

from openpyxl import load_workbook, Workbook

wb1 = load_workbook('/Users/Mike/Downloads/ceshi/employee1.xlsx')
wb2 = load_workbook('/Users/Mike/Downloads/ceshi/employee2.xlsx')
wb3 = load_workbook('/Users/Mike/Downloads/ceshi/employee3.xlsx')

a_wb = wb1['employee']
b_wb = wb2['employee']
c_wb = wb3['employee']

a_wb_max_row = len(a_wb['B'])
b_wb_max_row = len(b_wb['B'])
c_wb_max_row = len(c_wb['B'])

'''
# 提取姓名
for row in a_wb.iter_cols(min_row=1, max_row=a_wb_max_row, min_col=1, max_col=1, values_only=True):
    row1 = list(row)
print(row1)
for row in b_wb.iter_cols(min_row=1, max_row=b_wb_max_row, min_col=1, max_col=1, values_only=True):
    row2 = list(row)
# print(row2)
for row in c_wb.iter_cols(min_row=1, max_row=c_wb_max_row, min_col=1, max_col=1, values_only=True):
    row3 = list(row)
'''

# 提取邮箱
for row in a_wb.iter_cols(min_row=2, max_row=a_wb_max_row, min_col=2, max_col=2, values_only=True):
    row1 = list(row)
# print(row1)
for row in b_wb.iter_cols(min_row=2, max_row=b_wb_max_row, min_col=2, max_col=2, values_only=True):
    row2 = list(row)
# print(row2)
for row in c_wb.iter_cols(min_row=2, max_row=c_wb_max_row, min_col=2, max_col=2, values_only=True):
    row3 = list(row)


# print(row3)


# 同时存在三张数据表格中
c = []
for i in row1:
    if i in row2:
        if i in row3:
            c.append(i)
for i in c:
    if i == None:
        c.remove(i)
print(c)

wb3 = Workbook()
d_wb = wb3.active

for i in range(len(c)):
    d = [c[i]]
    d_wb.append(d)

wb3.save('/Users/Mike/Downloads/mynewdata1.xlsx')


# 1表v2表v3表不存在的数据
e = []
for i in row1:
    if i not in row2:
        if i not in row3:
            e.append(i)
for i in e:
    if i == None:
        e.remove(i)
print(e)

wb4 = Workbook()
e_wb = wb4.active

for i in range(len(e)):
    f = [e[i]]
    e_wb.append(f)

wb4.save('/Users/Mike/Downloads/mynewdata2.xlsx')


# 2表v3表不存在的数据
f = []
for i in row2:
    if i not in row3:
        f.append(i)
for i in f:
    if i == None:
        f.remove(i)
print(f)

wb5 = Workbook()
f_wb = wb5.active

for i in range(len(f)):
    g = [f[i]]
    f_wb.append(g)

wb5.save('/Users/Mike/Downloads/mynewdata3.xlsx')


# 1表v2表不存在的数据
g = []
for i in row1:
    if i not in row2:
        g.append(i)
for i in g:
    if i == None:
        g.remove(i)
print(g)

wb6 = Workbook()
h_wb = wb6.active
for i in range(len(g)):
    h = [g[i]]
    h_wb.append(h)

wb6.save('/Users/Mike/Downloads/mynewdata4.xlsx')

猜你喜欢

转载自blog.csdn.net/qq_45194089/article/details/127760729