Python finds more tables for data comparison

Three tables of data (tens of thousands) need to be compared to see if there are any duplicates. Using excel itself for data verification is obviously a huge amount of data, so consider using python scripts to achieve it.

from openpyxl import load_workbook, Workbook

wb1 = load_workbook('/Users/Mike/Downloads/ceshi/employee1.xlsx')
wb2 = load_workbook('/Users/Mike/Downloads/ceshi/employee2.xlsx')
wb3 = load_workbook('/Users/Mike/Downloads/ceshi/employee3.xlsx')

a_wb = wb1['employee']
b_wb = wb2['employee']
c_wb = wb3['employee']

a_wb_max_row = len(a_wb['B'])
b_wb_max_row = len(b_wb['B'])
c_wb_max_row = len(c_wb['B'])

'''
# 提取姓名
for row in a_wb.iter_cols(min_row=1, max_row=a_wb_max_row, min_col=1, max_col=1, values_only=True):
    row1 = list(row)
print(row1)
for row in b_wb.iter_cols(min_row=1, max_row=b_wb_max_row, min_col=1, max_col=1, values_only=True):
    row2 = list(row)
# print(row2)
for row in c_wb.iter_cols(min_row=1, max_row=c_wb_max_row, min_col=1, max_col=1, values_only=True):
    row3 = list(row)
'''

# 提取邮箱
for row in a_wb.iter_cols(min_row=2, max_row=a_wb_max_row, min_col=2, max_col=2, values_only=True):
    row1 = list(row)
# print(row1)
for row in b_wb.iter_cols(min_row=2, max_row=b_wb_max_row, min_col=2, max_col=2, values_only=True):
    row2 = list(row)
# print(row2)
for row in c_wb.iter_cols(min_row=2, max_row=c_wb_max_row, min_col=2, max_col=2, values_only=True):
    row3 = list(row)


# print(row3)


# 同时存在三张数据表格中
c = []
for i in row1:
    if i in row2:
        if i in row3:
            c.append(i)
for i in c:
    if i == None:
        c.remove(i)
print(c)

wb3 = Workbook()
d_wb = wb3.active

for i in range(len(c)):
    d = [c[i]]
    d_wb.append(d)

wb3.save('/Users/Mike/Downloads/mynewdata1.xlsx')


# 1表v2表v3表不存在的数据
e = []
for i in row1:
    if i not in row2:
        if i not in row3:
            e.append(i)
for i in e:
    if i == None:
        e.remove(i)
print(e)

wb4 = Workbook()
e_wb = wb4.active

for i in range(len(e)):
    f = [e[i]]
    e_wb.append(f)

wb4.save('/Users/Mike/Downloads/mynewdata2.xlsx')


# 2表v3表不存在的数据
f = []
for i in row2:
    if i not in row3:
        f.append(i)
for i in f:
    if i == None:
        f.remove(i)
print(f)

wb5 = Workbook()
f_wb = wb5.active

for i in range(len(f)):
    g = [f[i]]
    f_wb.append(g)

wb5.save('/Users/Mike/Downloads/mynewdata3.xlsx')


# 1表v2表不存在的数据
g = []
for i in row1:
    if i not in row2:
        g.append(i)
for i in g:
    if i == None:
        g.remove(i)
print(g)

wb6 = Workbook()
h_wb = wb6.active
for i in range(len(g)):
    h = [g[i]]
    h_wb.append(h)

wb6.save('/Users/Mike/Downloads/mynewdata4.xlsx')

Guess you like

Origin blog.csdn.net/qq_45194089/article/details/127760729
Recommended