# 1、按列(axis=0)拼接多个Series,拼接后仍为Series格式
import pandas as pd
# s1 = pd.Series(['a', 'b', 'c'])
# print('s1: \n', s1)
# s2 = pd.Series(['A', 'B', 'C'])
# print('s2: \n', s2)
# # 默认按列(axis=0)拼接多个Series
# data1 = pd.concat([s1, s2])
# print('data1: \n', data1)
# print('data1_type: \n', type(data1))
# 结果为:
s1:
0 a
1 b
2 c
dtype: object
s2:
0 A
1 B
2 C
dtype: object
data1:
0 a
1 b
2 c
0 A
1 B
2 C
dtype: object
data1_type:
<class 'pandas.core.series.Series'>
# 2、按行拼接多个Series(即按索引拼接),拼接后格式为DataFrame格式
# (1)索引相同时
s1 = pd.Series(['a', 'b', 'c'])
print('s1: \n', s1)
s2 = pd.Series(['A', 'B', 'C'])
print('s2: \n', s2)
data2 = pd.concat([s1, s2], axis=1)
print('data2: \n', data2)
print('data2_type: \n', type(data2))
# 结果为:
s1:
0 a
1 b
2 c
dtype: object
s2:
0 A
1 B
2 C
dtype: object
data2:
0 1
0 a A
1 b B
2 c C
data2_type:
<class 'pandas.core.frame.DataFrame'>
# (2)索引不同时
# s1 = pd.Series(['a', 'b', 'c'])
# s1.index = ['1', '2', '3']
# print('s1: \n', s1)
# s2 = pd.Series(['A', 'B', 'C'])
# s2.index = ['11', '22', '33']
# print('s2: \n', s2)
# data3 = pd.concat([s1, s2], axis=1)
# print('data3: \n', data3)
# 结果为:
s1:
1 a
2 b
3 c
dtype: object
s2:
11 A
22 B
33 C
dtype: object
data3:
0 1
1 a NaN
11 NaN A
2 b NaN
22 NaN B
3 c NaN
33 NaN C
# 3、当axis=0(按列拼接多个Series时)keys, names可以发挥重要作用
# s1 = pd.Series(['a', 'b', 'c'])
# print('s1: \n', s1)
# s2 = pd.Series(['A', 'B', 'C'])
# print('s2: \n', s2)
# data4 = pd.concat([s1, s2], keys=['s1', 's2'], names=['Series_name', 'Index'])
# print('data4: \n', data4)
# 结果为:
s1:
0 a
1 b
2 c
dtype: object
s2:
0 A
1 B
2 C
dtype: object
data4:
Series_name Index
s1 0 a
1 b
2 c
s2 0 A
1 B
2 C
dtype: object
拼接多个DataFrame
# 1、按列拼(axis=0)接多个DataFrame
# 特点:最终的DataFrame包含了所有的列名而且不重复
# (1)列的名称和个数都相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'])
# print('d2: \n', d2)
# data1 = pd.concat([d1, d2])
# print('data1: \n', data1)
# 结果为:
d1:
letter number
0 a 1
1 b 2
d2:
letter number
0 c 3
1 d 4
data1:
letter number
0 a 1
1 b 2
0 c 3
1 d 4
# (2)列的名称或个数不相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'Number'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'])
# print('d2: \n', d2)
# data2 = pd.concat([d1, d2])
# print('data2: \n', data2)
# 结果为:
d1:
letter Number
0 a 1
1 b 2
d2:
letter number
0 c 3
1 d 4
data2:
Number letter number
0 1 a NaN
1 2 b NaN
0 NaN c 3
1 NaN d 4
# (3)join默认为outer,当join='inner'时可以将存在NaN的列去掉,也就是只拼接多个DataFrame的交集列
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'Number'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'])
# print('d2: \n', d2)
# data3 = pd.concat([d1, d2], join='inner')
# # 比较data2和data3
# print('data3: \n', data3)
# 结果为:
d1:
letter Number
0 a 1
1 b 2
d2:
letter number
0 c 3
1 d 4
data3:
letter
0 a
1 b
0 c
1 d
# 2、按行(即按索引)拼接多个DataFrame
# (1)索引的名称都相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['1', '2'])
# print('d2: \n', d2)
# data4 = pd.concat([d1, d2], axis=1)
# print('data4: \n', data4)
# 结果为:
d1:
letter number
1 a 1
2 b 2
d2:
letter number
1 c 3
2 d 4
data4:
letter number letter number
1 a 1 c 3
2 b 2 d 4
# (2)索引的名称不相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['3', '4'])
# print('d2: \n', d2)
# data5 = pd.concat([d1, d2], axis=1)
# print('data5: \n', data5)
# 结果为:
d1:
letter number
1 a 1
2 b 2
d2:
letter number
3 c 3
4 d 4
data5:
letter number letter number
1 a 1 NaN NaN
2 b 2 NaN NaN
3 NaN NaN c 3
4 NaN NaN d 4
二、append()函数
# DataFrame1.append(DataFrame2)返回一个新的DataFrame,作用和pd.concat([DataFrame1, DataFrame2])相同
# (1)列名个数和名称相同
# d1 = pd.DataFrame([['a', '1'], ['b', '2']], columns=['letter', 'number'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['1', '2'])
# print('d2: \n', d2)
# data1 = d1.append(d2)
# print('data1: \n', data1)
# 结果为:
d1:
letter number
1 a 1
2 b 2
d2:
letter number
1 c 3
2 d 4
data1:
letter number
1 a 1
2 b 2
1 c 3
2 d 4
# (2)列名个数或名称不相同
# d1 = pd.DataFrame([['a', '1', 'dog'], ['b', '2', 'cat']], columns=['letter', 'number', 'animal'], index=['1', '2'])
# print('d1: \n', d1)
# d2 = pd.DataFrame([['c', '3'], ['d', '4']], columns=['letter', 'number'], index=['1', '2'])
# print('d2: \n', d2)
# data2 = d1.append(d2)
# # data2 = pd.concat([d1, d2])
# print('data2: \n', data2)
# 结果为:
d1:
letter number animal
1 a 1 dog
2 b 2 cat
d2:
letter number
1 c 3
2 d 4
data2:
animal letter number
1 dog a 1
2 cat b 2
1 NaN c 3
2 NaN d 4