Pandas--表连接的几种方式

import pandas as pd

raw_data_1 = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}

raw_data_2 = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}

raw_data_3 = {
        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
        'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df1 = pd.DataFrame(raw_data_1, columns = ['subject_id', 'first_name', 'last_name'])
df2 = pd.DataFrame(raw_data_2, columns = ['subject_id', 'first_name', 'last_name'])
df3 = pd.DataFrame(raw_data_3, columns = ['subject_id','test_id'])
df1
subject_id first_name last_name
0 1 Alex Anderson
1 2 Amy Ackerman
2 3 Allen Ali
3 4 Alice Aoni
4 5 Ayoung Atiches
df2
subject_id first_name last_name
0 4 Billy Bonder
1 5 Brian Black
2 6 Bran Balwner
3 7 Bryce Brice
4 8 Betty Btisan
df3
subject_id test_id
0 1 51
1 2 15
2 3 15
3 4 61
4 5 16
5 7 14
6 8 15
7 9 1
8 10 61
9 11 16

append

在这里插入图片描述

# ignore_index=True 充值索引
df1.append(df2, ignore_index=True)
subject_id first_name last_name
0 1 Alex Anderson
1 2 Amy Ackerman
2 3 Allen Ali
3 4 Alice Aoni
4 5 Ayoung Atiches
5 4 Billy Bonder
6 5 Brian Black
7 6 Bran Balwner
8 7 Bryce Brice
9 8 Betty Btisan
# # verify_intergrity=True 若存在重复索引则报错
# df1.append(df2, verify_integrity=True)
# 数据框维度不同时
df1.append(df3)
subject_id first_name last_name test_id
0 1 Alex Anderson NaN
1 2 Amy Ackerman NaN
2 3 Allen Ali NaN
3 4 Alice Aoni NaN
4 5 Ayoung Atiches NaN
0 1 NaN NaN 51.0
1 2 NaN NaN 15.0
2 3 NaN NaN 15.0
3 4 NaN NaN 61.0
4 5 NaN NaN 16.0
5 7 NaN NaN 14.0
6 8 NaN NaN 15.0
7 9 NaN NaN 1.0
8 10 NaN NaN 61.0
9 11 NaN NaN 16.0

join

在这里插入图片描述

df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
df
key A
0 K0 A0
1 K1 A1
2 K2 A2
3 K3 A3
4 K4 A4
5 K5 A5
other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
                      'B': ['B0', 'B1', 'B2']})
other
key B
0 K0 B0
1 K1 B1
2 K2 B2
# 默认按照index左连接
# 当表中有重复列且不被作为连接依据时需要指定后缀
df.join(other, lsuffix='_caller', rsuffix='_other')
key_caller A key_other B
0 K0 A0 K0 B0
1 K1 A1 K1 B1
2 K2 A2 K2 B2
3 K3 A3 NaN NaN
4 K4 A4 NaN NaN
5 K5 A5 NaN NaN
df.join(other.set_index('key'), on='key')
key A B
0 K0 A0 B0
1 K1 A1 B1
2 K2 A2 B2
3 K3 A3 NaN
4 K4 A4 NaN
5 K5 A5 NaN
df.set_index('key').join(other.set_index('key'))
A B
key
K0 A0 B0
K1 A1 B1
K2 A2 B2
K3 A3 NaN
K4 A4 NaN
K5 A5 NaN

pandas.DataFrame.merge

# 默认内连接
df1.merge(df2, on='subject_id')
subject_id first_name_x last_name_x first_name_y last_name_y
0 4 Alice Aoni Billy Bonder
1 5 Ayoung Atiches Brian Black
df2.subject_id = df2['subject_id'].astype(int)
# 指定连接基准为:左表的index,右表的subject_id列
df1.merge(df2, left_index=True, right_on='subject_id')
subject_id subject_id_x first_name_x last_name_x subject_id_y first_name_y last_name_y
0 4 5 Ayoung Atiches 4 Billy Bonder

pd.concat

在这里插入图片描述

pd.concat([df1,df2])
subject_id first_name last_name
0 1 Alex Anderson
1 2 Amy Ackerman
2 3 Allen Ali
3 4 Alice Aoni
4 5 Ayoung Atiches
0 4 Billy Bonder
1 5 Brian Black
2 6 Bran Balwner
3 7 Bryce Brice
4 8 Betty Btisan
pd.concat([df1,df3])
subject_id first_name last_name test_id
0 1 Alex Anderson NaN
1 2 Amy Ackerman NaN
2 3 Allen Ali NaN
3 4 Alice Aoni NaN
4 5 Ayoung Atiches NaN
0 1 NaN NaN 51.0
1 2 NaN NaN 15.0
2 3 NaN NaN 15.0
3 4 NaN NaN 61.0
4 5 NaN NaN 16.0
5 7 NaN NaN 14.0
6 8 NaN NaN 15.0
7 9 NaN NaN 1.0
8 10 NaN NaN 61.0
9 11 NaN NaN 16.0
pd.concat([df1,df3], axis=1, join='inner')
subject_id first_name last_name subject_id test_id
0 1 Alex Anderson 1 51
1 2 Amy Ackerman 2 15
2 3 Allen Ali 3 15
3 4 Alice Aoni 4 61
4 5 Ayoung Atiches 5 16

猜你喜欢

转载自blog.csdn.net/ddjhpxs/article/details/107435587