import numpy as np
import pandas as pd
from pandas import Series, DataFrame
df1 = DataFrame({'data1':[1,2,3,4],'key':['a','b','c','a']})
df1
|
data1 |
key |
0 |
1 |
a |
1 |
2 |
b |
2 |
3 |
c |
3 |
4 |
a |
df2 = DataFrame({'data2':[4,5,6],'key':['a','e','d']})
df2
|
data2 |
key |
0 |
4 |
a |
1 |
5 |
e |
2 |
6 |
d |
pd.merge(df1,df2)
|
data1 |
key |
data2 |
0 |
1 |
a |
4 |
1 |
4 |
a |
4 |
pd.merge(df1,df2, on='key')
|
data1 |
key |
data2 |
0 |
1 |
a |
4 |
1 |
4 |
a |
4 |
pd.merge(df1,df2, on=None)
|
data1 |
key |
data2 |
0 |
1 |
a |
4 |
1 |
4 |
a |
4 |
pd.merge(df1,df2, on='key', how='inner')
|
data1 |
key |
data2 |
0 |
1 |
a |
4 |
1 |
4 |
a |
4 |
pd.merge(df1,df2, on='key', how='left')
|
data1 |
key |
data2 |
0 |
1 |
a |
4.0 |
1 |
2 |
b |
NaN |
2 |
3 |
c |
NaN |
3 |
4 |
a |
4.0 |
pd.merge(df1,df2, on='key', how='right')
|
data1 |
key |
data2 |
0 |
1.0 |
a |
4 |
1 |
4.0 |
a |
4 |
2 |
NaN |
e |
5 |
3 |
NaN |
d |
6 |
pd.merge(df1,df2, on='key', how='outer')
|
data1 |
key |
data2 |
0 |
1.0 |
a |
4.0 |
1 |
4.0 |
a |
4.0 |
2 |
2.0 |
b |
NaN |
3 |
3.0 |
c |
NaN |
4 |
NaN |
e |
5.0 |
5 |
NaN |
d |
6.0 |