# -*- coding: utf-8 -*-
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
'''
索引上的合并
'''
left1=DataFrame({'key':['a','b','a','a','b','c'],'value':range(6)})
print(left1)
# key value
# 0 a 0
# 1 b 1
# 2 a 2
# 3 a 3
# 4 b 4
# 5 c 5
right1=DataFrame({'group_val':[3.5,7]},index=['a','b'])
print(right1)
# group_val
# a 3.5
# b 7.0
print(pd.merge(left1,right1,left_on='key',right_index=True))#左连接(不显示没有对应的键,显示的话,加属性,how='outer'),key为连接键的列,右边为连接键
# key value group_val
# 0 a 0 3.5
# 2 a 2 3.5
# 3 a 3 3.5
# 1 b 1 7.0
# 4 b 4 7.0
#join用法,key列为连接键的列 on的用法
left1.join(right1,on='key')
# key value group_val
# 0 a 0 3.5
# 2 a 2 3.5
# 3 a 3 3.5
# 1 b 1 7.0
# 4 b 4 7.0
# 5 c 5 NaN
#交集
print(pd.merge(left1,right1,left_on='key',right_index=True,how='outer'))
# key value group_val
# 0 a 0 3.5
# 2 a 2 3.5
# 3 a 3 3.5
# 1 b 1 7.0
# 4 b 4 7.0
# 5 c 5 NaN
'''
层次化索引的合并
'''
lefth=DataFrame({'key1':['Oh','Oh','Oh','ne','ne'],'key2':[2000,2001,2002,2001,2002],'data':np.arange(5)})
print(lefth)
# key1 key2 data
# 0 Oh 2000 0
# 1 Oh 2001 1
# 2 Oh 2002 2
# 3 ne 2001 3
# 4 ne 2002 4
righth=DataFrame(np.arange(12).reshape(6,2),index=[['ne','ne','Oh','Oh','Oh','Oh'],[2001,2000,2000,2000,2001,2002]],columns=['event1','event2'])
print(righth)
# event1 event2
# ne 2001 0 1
# 2000 2 3
# Oh 2000 4 5
# 2000 6 7
# 2001 8 9
# 2002 10 11
print(pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True))#由key1和key2 的为连接键的列,以lefth为基础的左连接,右侧行索引作为连接键,~~划重点
# key1 key2 data event1 event2
# 0 Oh 2000 0 4 5
# 0 Oh 2000 0 6 7
# 1 Oh 2001 1 8 9
# 2 Oh 2002 2 10 11
# 3 ne 2001 3 0 1
print(pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True,how='outer'))#并集
# key1 key2 data event1 event2
# 0 Oh 2000 0.0 4.0 5.0
# 0 Oh 2000 0.0 6.0 7.0
# 1 Oh 2001 1.0 8.0 9.0
# 2 Oh 2002 2.0 10.0 11.0
# 3 ne 2001 3.0 0.0 1.0
# 4 ne 2002 4.0 NaN NaN
# 4 ne 2000 NaN 2.0 3.0
'''
双向的索引,表合并
'''
left2=DataFrame(np.arange(6).reshape(3,2),index=list('abc'),columns=['Oh','Ne'])
print(left2)
# Oh Ne
# a 0 1
# b 2 3
# c 4 5
right2=DataFrame([[6,7],[8,9],[10,11],[12,13]],index=list('bcde'),columns=['Mi','Al'])
print(right2)
# Mi Al
# b 6 7
# c 8 9
# d 10 11
# e 12 13
print(pd.merge(left2,right2,left_index=True,right_index=True,how='outer'))
# Oh Ne Mi Al
# a 0.0 1.0 NaN NaN
# b 2.0 3.0 6.0 7.0
# c 4.0 5.0 8.0 9.0
# d NaN NaN 10.0 11.0
# e NaN NaN 12.0 13.0
'''
join方法
'''
print(left2.join(right2,how='outer'))
# Oh Ne Mi Al
# a 0.0 1.0 NaN NaN
# b 2.0 3.0 6.0 7.0
# c 4.0 5.0 8.0 9.0
# d NaN NaN 10.0 11.0
# e NaN NaN 12.0 13.0
'''
连接多张表
'''
#另一种创建矩阵的方式
another=DataFrame([[7,8],[9,10],[11,12],[16,17]],index=list('acef'),columns=['北京','上海'])
print(left2.join([right2,another]))#左连接,不显示无关联项
# Oh Ne Mi Al 北京 上海
# a 0 1 NaN NaN 7.0 8.0
# b 2 3 6.0 7.0 NaN NaN
# c 4 5 8.0 9.0 9.0 10.0
#显示所有
print(left2.join([right2,another],how='outer'))
# Oh Ne Mi Al 北京 上海
# a 0.0 1.0 NaN NaN 7.0 8.0
# b 2.0 3.0 6.0 7.0 NaN NaN
# c 4.0 5.0 8.0 9.0 9.0 10.0
# d NaN NaN 10.0 11.0 NaN NaN
# e NaN NaN 12.0 13.0 11.0 12.0
# f NaN NaN NaN NaN 16.0 17.0