#Pandas具有全功能的,高性能内存中连接操作,与Sql关系数据库非常相似
import numpy as np
import pandas as pd
In [18]:
#合并 连接 去重 替换
df1 = pd.DataFrame({'key':['K0','K1','K2','K3'],
'A' :['A0','A1','A2','A3'],
'B' :['B0','B1','B2','B3']})
df2 = pd.DataFrame({'key':['K0','K1','K2','K3'],
'C' :['C0','C1','C2','C3'],
'D' :['D0','D1','D2','D3']})
df3 = pd.DataFrame({'key1':['K0','K0','K1','K2'],
'key2':['K0','K1','K0','K1'],
'A' :['A0','A1','A2','A3'],
'B' :['B0','B1','B2','B3']})
df4 = pd.DataFrame({'key1':['K0','K1','K1','K2'],
'key2':['K0','K0','K0','K0'],
'C' :['C0','C1','C2','C3'],
'D' :['D0','D1','D2','D3']})
In [20]:
#合并 on='key'是参考键
print(df1)
print(df2)
print(pd.merge(df1,df2,on='key'))
A B key
0 A0 B0 K0
1 A1 B1 K1
2 A2 B2 K2
3 A3 B3 K3
C D key
0 C0 D0 K0
1 C1 D1 K1
2 C2 D2 K2
3 C3 D3 K3
A B key C D
0 A0 B0 K0 C0 D0
1 A1 B1 K1 C1 D1
2 A2 B2 K2 C2 D2
3 A3 B3 K3 C3 D3
In [21]:
print(pd.merge(df3,df4,on=['key1','key2']))
print(df3)
print(df4)
print(pd.merge(df3,df4,on=['key1','key2']))
A B key1 key2
0 A0 B0 K0 K0
1 A1 B1 K0 K1
2 A2 B2 K1 K0
3 A3 B3 K2 K1
C D key1 key2
0 C0 D0 K0 K0
1 C1 D1 K1 K0
2 C2 D2 K1 K0
3 C3 D3 K2 K0
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A2 B2 K1 K0 C1 D1
2 A2 B2 K1 K0 C2 D2
In [23]:
#参数how 合并方式
#交集
print(pd.merge(df3,df4,on=['key1','key2'],how = 'inner'))
#并集
print(pd.merge(df3,df4,on=['key1','key2'],how = 'outer'))
#以df3为参考进行合并,数据缺失为NaN
print(pd.merge(df3,df4,on=['key1','key2'],how = 'left'))
#以df4为参考进行合并,数据缺失为NaN
print(pd.merge(df3,df4,on=['key1','key2'],how = 'right'))
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A2 B2 K1 K0 C1 D1
2 A2 B2 K1 K0 C2 D2
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K0 K1 NaN NaN
2 A2 B2 K1 K0 C1 D1
3 A2 B2 K1 K0 C2 D2
4 A3 B3 K2 K1 NaN NaN
5 NaN NaN K2 K0 C3 D3
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K0 K1 NaN NaN
2 A2 B2 K1 K0 C1 D1
3 A2 B2 K1 K0 C2 D2
4 A3 B3 K2 K1 NaN NaN
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A2 B2 K1 K0 C1 D1
2 A2 B2 K1 K0 C2 D2
3 NaN NaN K2 K0 C3 D3
In [28]:
left_on right_on left_index right_on
#参数 left_on right_on left_index right_on 当键不为一个列时,可以单独设置左键与右键
df1 = pd.DataFrame({'lkey':list('bbacaab'),
'data1':range(7)})
df2 = pd.DataFrame({'rkey':list('abd'),
'data2':range(3)})
print(df1)
print(df2)
print(pd.merge(df1,df2,left_on = 'lkey',right_on='rkey'))
#df1 以lkey为键,df2 以rkey为键
data1 lkey
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 a
6 6 b
data2 rkey
0 0 a
1 1 b
2 2 d
data1 lkey data2 rkey
0 0 b 1 b
1 1 b 1 b
2 6 b 1 b
3 2 a 0 a
4 4 a 0 a
5 5 a 0 a
In [43]:
right
df1 = pd.DataFrame({'key':list('abcdefg'),
'data':range(7)})
df2 = pd.DataFrame({'data1':range(100,105)},
index = list('abcde'))
print(df1)
print(df2)
print(pd.merge(df1,df2,left_on='key',right_index=True))
#df1 以key为按键 df2 以index为按键
#left_index 为True时,第一个df以index为键,默认为False
#right_index 为True时,第二个df以index为键,默认为False
#可以相互组合 left_on right_on left_index right_on
#left_on+right_on left_index+right_index right_on+left_index left_on+right_index
data key
0 0 a
1 1 b
2 2 c
3 3 d
4 4 e
5 5 f
6 6 g
data1
a 100
b 101
c 102
d 103
e 104
data key data1
0 0 a 100
1 1 b 101
2 2 c 102
3 3 d 103
4 4 e 104
In [52]:
#concat axis = 1 列+列 axis = 0 行+行
s1 = pd.Series([1,2,3])
s2 = pd.Series([2,3,4])
print(s1)
print(s2)
print(pd.concat([s1,s2]).sort_index())
print(pd.concat([s1,s2]))
0 1
1 2
2 3
dtype: int64
0 2
1 3
2 4
dtype: int64
0 1
0 2
1 2
1 3
2 3
2 4
dtype: int64
0 1
1 2
2 3
0 2
1 3
2 4
dtype: int64
In [58]:
去掉重复
#去重
s = pd.Series([1,2,3,4,5,6,7,8,9,1,2,3,4,5,6])
print(s)
print(s.duplicated())
#判断是否重复
#去掉重复
print(s.drop_duplicates())
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 8
8 9
9 1
10 2
11 3
12 4
13 5
14 6
dtype: int64
0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 True
10 True
11 True
12 True
13 True
14 True
dtype: bool
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 8
8 9
dtype: int64
In [62]:
'
#替换
s = pd.Series(list('ascaazsd'))
print(s)
print(s.replace('a','z'))
0 a
1 s
2 c
3 a
4 a
5 z
6 s
7 d
dtype: object
0 z
1 s
2 c
3 z
4 z
5 z
6 s
7 d
dtype: object
Python之Pandas(4)
猜你喜欢
转载自blog.csdn.net/weixin_38452632/article/details/83659139
今日推荐
周排行