import pandas as pd
import numpy as np
from pandas import Series, DataFrame
df1 = DataFrame({'key':['X','Y','Z'], 'data_set_1':[1,2,3]})
df1
Out[5]:
data_set_1 key
0 1 X
1 2 Y
2 3 Z
df2 = DataFrame({'key':['A','B','C'], 'data_set_2':[4,5,6]})
df2
Out[7]:
data_set_2 key
0 4 A
1 5 B
2 6 C
# 没有相同的列值
pd.merge(df1,df2)
Out[8]:
Empty DataFrame
Columns: [data_set_1, key, data_set_2]
Index: []
# 默认合并
df2 = DataFrame({'key':['X','B','C'], 'data_set_2':[4,5,6]})
pd.merge(df1,df2)
Out[10]:
data_set_1 key data_set_2
0 1 X 4
df1 = DataFrame({'key':['X','Y','Z','X'], 'data_set_1':[1,2,3,4]})
pd.merge(df1,df2)
Out[12]:
data_set_1 key data_set_2
0 1 X 4
1 4 X 4
# on为指定列,默认情况下会自动找到相同名列,若指定了不同名列会保错,有两列以上相同的需要指定on
pd.merge(df1,df2,on='key')
Out[13]:
data_set_1 key data_set_2
0 1 X 4
1 4 X 4
# 连接的方式,how=inner(默认),left,right,outer
pd.merge(df1,df2,on='key',how='inner')
Out[15]:
data_set_1 key data_set_2
0 1 X 4
1 4 X 4
pd.merge(df1,df2,on='key',how='left')
Out[16]:
data_set_1 key data_set_2
0 1 X 4.0
1 2 Y NaN
2 3 Z NaN
3 4 X 4.0
pd.merge(df1,df2,on='key',how='right')
Out[17]:
data_set_1 key data_set_2
0 1.0 X 4
1 4.0 X 4
2 NaN B 5
3 NaN C 6
pd.merge(df1,df2,on='key',how='outer')
Out[18]:
data_set_1 key data_set_2
0 1.0 X 4.0
1 4.0 X 4.0
2 2.0 Y NaN
3 3.0 Z NaN
4 NaN B 5.0
5 NaN C 6.0
Pandas玩转数据(四) -- DataFrame的merge
猜你喜欢
转载自blog.csdn.net/weixin_39778570/article/details/81107033
今日推荐
周排行