python数据分析十三:pandas矩阵的轴向连接(concat详解)

# -*- coding: utf-8 -*-
import pandas as pd

from pandas import Series,DataFrame

import numpy as np
'''

轴向连接
'''
arr=np.arange(12).reshape(3,4)
print(arr)

print(np.concatenate([arr,arr],axis=1))
# [[ 0  1  2  3  0  1  2  3]
#  [ 4  5  6  7  4  5  6  7]
#  [ 8  9 10 11  8  9 10 11]]

print(np.concatenate([arr,arr]))
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]
#  [ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]


#Series 轴向连接
s1=Series([1,2],index=['a','b'])
s2=Series([2,3],index=['c','d'])
s3=Series([4,5],index=['e','f'])
print(pd.concat([s1,s2,s3]))
# a    1
# b    2
# c    2
# d    3
# e    4
# f    5
# dtype: int64

print(pd.concat([s1,s2,s3],axis=1))
#     0    1    2
# a  1.0  NaN  NaN
# b  2.0  NaN  NaN
# c  NaN  2.0  NaN
# d  NaN  3.0  NaN
# e  NaN  NaN  4.0
# f  NaN  NaN  5.0

s4=pd.concat([s1*5,s3])
print(s4)
# a     5
# b    10
# e     4
# f     5
# dtype: int64

#两表的并集
print(pd.concat([s1,s4],axis=1))
#      0   1
# a  1.0   5
# b  2.0  10
# e  NaN   4
# f  NaN   5


#取交集
print(pd.concat([s1,s4],axis=1,join='inner'))
#    0   1
# a  1   5
# b  2  10

#层次化索引
result=pd.concat([s1,s1,s3],keys=['one','two','three'])
print(result)
# one    a    1
#        b    2
# two    a    1
#        b    2
# three  e    4
#        f    5
# dtype: int64


#转置
print(result.unstack())
#          a    b    e    f
# one    1.0  2.0  NaN  NaN
# two    1.0  2.0  NaN  NaN
# three  NaN  NaN  4.0  5.0


'''
合并以列开头
'''
print(pd.concat([s1,s2,s3],axis=1,keys=['one','two','three']))
#    one  two  three
# a  1.0  NaN    NaN
# b  2.0  NaN    NaN
# c  NaN  2.0    NaN
# d  NaN  3.0    NaN
# e  NaN  NaN    4.0
# f  NaN  NaN    5.0



'''
DataFrame合并
'''
df1=DataFrame(np.arange(6).reshape(3,2),index=['a','b','c'],columns=['one','two'])
print(df1)
#   one  two
# a    0    1
# b    2    3
# c    4    5

df2=DataFrame(5+np.arange(4).reshape(2,2),index=['a','c'],columns=['three','fore'])

#不合并
print(pd.concat([df1,df2]))
#    fore  one  three  two
# a   NaN  0.0    NaN  1.0
# b   NaN  2.0    NaN  3.0
# c   NaN  4.0    NaN  5.0
# a   6.0  NaN    5.0  NaN
# c   8.0  NaN    7.0  NaN

#合并
print(pd.concat([df1,df2],axis=1))
#    one  two  three  fore
# a    0    1    5.0   6.0
# b    2    3    NaN   NaN
# c    4    5    7.0   8.0

#加入层次
print(pd.concat([df1,df2],axis=1,keys=['level1','level2']))
#   level1     level2
#      one two  three fore
# a      0   1    5.0  6.0
# b      2   3    NaN  NaN
# c      4   5    7.0  8.0

#给层次化创建参数
print(pd.concat([df1,df2],axis=1,keys=['level1','level2'],names=['upper','lower']))
# upper level1     level2
# lower    one two  three fore
# a          0   1    5.0  6.0
# b          2   3    NaN  NaN
# c          4   5    7.0  8.0

#合并索引
df1=DataFrame(np.random.randn(3,4),columns=list('abcd'))
df2=DataFrame(np.random.randn(2,3),columns=list('bda'))
print(pd.concat([df1,df2]))
#       a         b         c         d
# 0 -0.333004 -0.897424 -1.217519  0.490918
# 1  1.021084 -0.425976 -0.486051  0.431409
# 2  0.380350 -1.534331  0.577973 -0.023094
# 0  0.529469 -1.242712       NaN  0.246182
# 1  0.359058 -2.106655       NaN -0.730740
print(pd.concat([df1,df2],ignore_index=True))
#    a         b         c         d
# 0 -0.333004 -0.897424 -1.217519  0.490918
# 1  1.021084 -0.425976 -0.486051  0.431409
# 2  0.380350 -1.534331  0.577973 -0.023094
# 3  0.529469 -1.242712       NaN  0.246182
# 4  0.359058 -2.106655       NaN -0.730740

猜你喜欢

转载自blog.csdn.net/qq_38788128/article/details/80761654
今日推荐