Python——pandas中的concat()函数和append()函数

一、concat()函数

  • 拼接多个Series
# 1、按列(axis=0)拼接多个Series,拼接后仍为Series格式
import pandas as pd
# s1 = pd.Series(['a', 'b', 'c'])
# print('s1: \n', s1)
# s2 = pd.Series(['A', 'B', 'C'])
# print('s2: \n', s2)
# # 默认按列(axis=0)拼接多个Series
# data1 = pd.concat([s1, s2])
# print('data1: \n', data1)
# print('data1_type: \n', type(data1))
# 结果为:
s1: 
 0    a
1    b
2    c
dtype: object
s2: 
 0    A
1    B
2    C
dtype: object
data1: 
 0    a
1    b
2    c
0    A
1    B
2    C
dtype: object
data1_type: 
 <class 'pandas.core.series.Series'>
# 2、按行拼接多个Series(即按索引拼接),拼接后格式为DataFrame格式
# (1)索引相同时
s1 = pd.Series(['a', 'b', 'c'])
print('s1: \n', s1)
s2 = pd.Series(['A', 'B', 'C'])
print('s2: \n', s2)
data2 = pd.concat([s1, s2], axis=1)
print('data2: \n', data2)
print('data2_type: \n', type(data2))
# 结果为:
s1: 
 0    a
1    b
2    c
dtype: object
s2: 
 0    A
1    B
2    C
dtype: object
data2: 
    0  1
0  a  A
1  b  B
2  c  C
data2_type: 
 <class 'pandas.core.frame.DataFrame'>

# (2)索引不同时
# s1 = pd.Series(['a', 'b', 'c'])
# s1.index = ['1', '2', '3']
# print('s1: \n', s1)
# s2 = pd.Series(['A', 'B', 'C'])
# s2.index = ['11', '22', '33']
# print('s2: \n', s2)
# data3 = pd.concat([s1, s2], axis=1)
# print('data3: \n', data3)
# 结果为:
s1: 
 1    a
2    b
3    c
dtype: object
s2: 
 11    A
22    B
33    C
dtype: object
data3: 
       0    1
1     a  NaN
11  NaN    A
2     b  NaN
22  NaN    B
3     c  NaN
33  NaN    C
# 3、当axis=0(按列拼接多个Series时)keys, names可以发挥重要作用
# s1 = pd.Series(['a', 'b', 'c'])
# print('s1: \n', s1)
# s2 = pd.Series(['A', 'B', 'C'])
# print('s2: \n', s2)
# data4 = pd.concat([s1, s2], keys=['s1', 's2'], names=['Series_name', 'Index'])
# print('data4: \n', data4)
# 结果为:
s1: 
 0    a
1    b
2    c
dtype: object
s2: 
 0    A
1    B
2    C
dtype: object
data4: 
 Series_name  Index
s1           0        a
             1        b
             2        c
s2           0        A
             1        B
             2        C
dtype: object
  • 拼接多个DataFrame
# 1、按列拼(axis=0)接多个DataFrame
# 特点:最终的DataFrame包含了所有的列名而且不重复
# (1)列的名称和个数都相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'])
# print('d2: \n', d2)
# data1 = pd.concat([d1, d2])
# print('data1: \n', data1)
# 结果为:
d1: 
   letter number
0      a      1
1      b      2
d2: 
   letter number
0      c      3
1      d      4
data1: 
   letter number
0      a      1
1      b      2
0      c      3
1      d      4

# (2)列的名称或个数不相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'Number'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'])
# print('d2: \n', d2)
# data2 = pd.concat([d1, d2])
# print('data2: \n', data2)
# 结果为:
d1: 
   letter Number
0      a      1
1      b      2
d2: 
   letter number
0      c      3
1      d      4
data2: 
   Number letter number
0      1      a    NaN
1      2      b    NaN
0    NaN      c      3
1    NaN      d      4

# (3)join默认为outer,当join='inner'时可以将存在NaN的列去掉,也就是只拼接多个DataFrame的交集列
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'Number'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'])
# print('d2: \n', d2)
# data3 = pd.concat([d1, d2], join='inner')
# # 比较data2和data3
# print('data3: \n', data3)
# 结果为:
d1: 
   letter Number
0      a      1
1      b      2
d2: 
   letter number
0      c      3
1      d      4
data3: 
   letter
0      a
1      b
0      c
1      d

# 2、按行(即按索引)拼接多个DataFrame
# (1)索引的名称都相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['1', '2'])
# print('d2: \n', d2)
# data4 = pd.concat([d1, d2], axis=1)
# print('data4: \n', data4)
# 结果为:
d1: 
   letter number
1      a      1
2      b      2
d2: 
   letter number
1      c      3
2      d      4
data4: 
   letter number letter number
1      a      1      c      3
2      b      2      d      4

# (2)索引的名称不相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['3', '4'])
# print('d2: \n', d2)
# data5 = pd.concat([d1, d2], axis=1)
# print('data5: \n', data5)
# 结果为:
d1: 
   letter number
1      a      1
2      b      2
d2: 
  letter number
3      c      3
4      d      4
data5: 
   letter number letter number
1      a      1    NaN    NaN
2      b      2    NaN    NaN
3    NaN    NaN      c      3
4    NaN    NaN      d      4

二、append()函数

# DataFrame1.append(DataFrame2)返回一个新的DataFrame,作用和pd.concat([DataFrame1, DataFrame2])相同
# (1)列名个数和名称相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['1', '2'])
# print('d2: \n', d2)
# data1 = d1.append(d2)
# print('data1: \n', data1)
# 结果为:
d1: 
   letter number
1      a      1
2      b      2
d2: 
   letter number
1      c      3
2      d      4
data1: 
   letter number
1      a      1
2      b      2
1      c      3
2      d      4

# (2)列名个数或名称不相同
# d1 = pd.DataFrame([['a', '1', 'dog'], ['b', '2', 'cat']], columns=['letter', 'number', 'animal'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['1', '2'])
# print('d2: \n', d2)
# data2 = d1.append(d2)
# # data2 = pd.concat([d1, d2])
# print('data2: \n', data2)
# 结果为:
d1: 
   letter number animal
1      a      1    dog
2      b      2    cat
d2: 
   letter number
1      c      3
2      d      4
data2: 
   animal letter number
1    dog      a      1
2    cat      b      2
1    NaN      c      3
2    NaN      d      4

猜你喜欢

转载自blog.csdn.net/watermelon12138/article/details/87939332
今日推荐