python数据分析十二:pandas多个矩阵的索引合并(merge的索引合并方法详解)

# -*- coding: utf-8 -*-
import pandas as pd

from pandas import Series,DataFrame

import numpy as np

'''
索引上的合并
'''
left1=DataFrame({'key':['a','b','a','a','b','c'],'value':range(6)})
print(left1)
#   key  value
# 0   a      0
# 1   b      1
# 2   a      2
# 3   a      3
# 4   b      4
# 5   c      5

right1=DataFrame({'group_val':[3.5,7]},index=['a','b'])
print(right1)
#    group_val
# a        3.5
# b        7.0
print(pd.merge(left1,right1,left_on='key',right_index=True))#左连接(不显示没有对应的键,显示的话,加属性,how='outer'),key为连接键的列,右边为连接键
#   key  value  group_val
# 0   a      0        3.5
# 2   a      2        3.5
# 3   a      3        3.5
# 1   b      1        7.0
# 4   b      4        7.0

#join用法,key列为连接键的列 on的用法
left1.join(right1,on='key')
#   key  value  group_val
# 0   a      0        3.5
# 2   a      2        3.5
# 3   a      3        3.5
# 1   b      1        7.0
# 4   b      4        7.0
# 5   c      5        NaN

#交集
print(pd.merge(left1,right1,left_on='key',right_index=True,how='outer'))

#   key  value  group_val
# 0   a      0        3.5
# 2   a      2        3.5
# 3   a      3        3.5
# 1   b      1        7.0
# 4   b      4        7.0
# 5   c      5        NaN

'''
层次化索引的合并
'''
lefth=DataFrame({'key1':['Oh','Oh','Oh','ne','ne'],'key2':[2000,2001,2002,2001,2002],'data':np.arange(5)})
print(lefth)
#   key1  key2  data
# 0   Oh  2000     0
# 1   Oh  2001     1
# 2   Oh  2002     2
# 3   ne  2001     3
# 4   ne  2002     4


righth=DataFrame(np.arange(12).reshape(6,2),index=[['ne','ne','Oh','Oh','Oh','Oh'],[2001,2000,2000,2000,2001,2002]],columns=['event1','event2'])
print(righth)
#          event1  event2
# ne 2001       0       1
#    2000       2       3
# Oh 2000       4       5
#    2000       6       7
#    2001       8       9
#    2002      10      11

print(pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True))#由key1和key2 的为连接键的列,以lefth为基础的左连接,右侧行索引作为连接键,~~划重点
#   key1  key2  data  event1  event2
# 0   Oh  2000     0       4       5
# 0   Oh  2000     0       6       7
# 1   Oh  2001     1       8       9
# 2   Oh  2002     2      10      11
# 3   ne  2001     3       0       1


print(pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True,how='outer'))#并集
#   key1  key2  data  event1  event2
# 0   Oh  2000   0.0     4.0     5.0
# 0   Oh  2000   0.0     6.0     7.0
# 1   Oh  2001   1.0     8.0     9.0
# 2   Oh  2002   2.0    10.0    11.0
# 3   ne  2001   3.0     0.0     1.0
# 4   ne  2002   4.0     NaN     NaN
# 4   ne  2000   NaN     2.0     3.0

'''
双向的索引,表合并
'''
left2=DataFrame(np.arange(6).reshape(3,2),index=list('abc'),columns=['Oh','Ne'])
print(left2)
#   Oh  Ne
# a   0   1
# b   2   3
# c   4   5

right2=DataFrame([[6,7],[8,9],[10,11],[12,13]],index=list('bcde'),columns=['Mi','Al'])
print(right2)
#    Mi  Al
# b   6   7
# c   8   9
# d  10  11
# e  12  13

print(pd.merge(left2,right2,left_index=True,right_index=True,how='outer'))
#     Oh   Ne    Mi    Al
# a  0.0  1.0   NaN   NaN
# b  2.0  3.0   6.0   7.0
# c  4.0  5.0   8.0   9.0
# d  NaN  NaN  10.0  11.0
# e  NaN  NaN  12.0  13.0


'''
join方法
'''
print(left2.join(right2,how='outer'))
#     Oh   Ne    Mi    Al
# a  0.0  1.0   NaN   NaN
# b  2.0  3.0   6.0   7.0
# c  4.0  5.0   8.0   9.0
# d  NaN  NaN  10.0  11.0
# e  NaN  NaN  12.0  13.0

'''
连接多张表
'''
#另一种创建矩阵的方式
another=DataFrame([[7,8],[9,10],[11,12],[16,17]],index=list('acef'),columns=['北京','上海'])
print(left2.join([right2,another]))#左连接,不显示无关联项
#    Oh  Ne   Mi   Al   北京    上海
# a   0   1  NaN  NaN  7.0   8.0
# b   2   3  6.0  7.0  NaN   NaN
# c   4   5  8.0  9.0  9.0  10.0

#显示所有
print(left2.join([right2,another],how='outer'))
#     Oh   Ne    Mi    Al    北京    上海
# a  0.0  1.0   NaN   NaN   7.0   8.0
# b  2.0  3.0   6.0   7.0   NaN   NaN
# c  4.0  5.0   8.0   9.0   9.0  10.0
# d  NaN  NaN  10.0  11.0   NaN   NaN
# e  NaN  NaN  12.0  13.0  11.0  12.0
# f  NaN  NaN   NaN   NaN  16.0  17.0




猜你喜欢

转载自blog.csdn.net/qq_38788128/article/details/80761621