import pandas
# 读取stocks_2016和stocks_2017两个数据集,用Symbol作为行索引名
stocks_2016 = pd.read_csv('data/stocks_2016.csv', index_col='Symbol')
stocks_2017 = pd.read_csv('data/stocks_2017.csv', index_col='Symbol')
stocks_2016
|
Shares |
Low |
High |
Symbol |
|
|
|
AAPL |
80 |
95 |
110 |
TSLA |
50 |
80 |
130 |
WMT |
40 |
55 |
70 |
stocks_2017
|
Shares |
Low |
High |
Symbol |
|
|
|
AAPL |
50 |
120 |
140 |
GE |
100 |
30 |
40 |
IBM |
87 |
75 |
95 |
SLB |
20 |
55 |
85 |
TXN |
500 |
15 |
23 |
TSLA |
100 |
100 |
300 |
将两个DataFrame放到一个列表中,用pandas的concat方法将它们连接起来
s_list = [stocks_2016, stocks_2017]
pd.concat(s_list)
|
Shares |
Low |
High |
Symbol |
|
|
|
AAPL |
80 |
95 |
110 |
TSLA |
50 |
80 |
130 |
WMT |
40 |
55 |
70 |
AAPL |
50 |
120 |
140 |
GE |
100 |
30 |
40 |
IBM |
87 |
75 |
95 |
SLB |
20 |
55 |
85 |
TXN |
500 |
15 |
23 |
TSLA |
100 |
100 |
300 |
# keys参数可以给两个DataFrame命名,该标签会出现在行索引的最外层,
会生成多层索引(multiIndex),names参数可以重命名每个索引层
pd.concat(s_list, keys=['2016', '2017'], names=['Year', 'Symbol'])
# keys参数可以给两个DataFrame命名,该标签会出现在行索引的最外层,会生成多层索引(multiIndex),names参数可以重命名每个索引层
pd.concat(s_list, keys=['2016', '2017'], names=['Year', 'Symbol'])
|
|
Shares |
Low |
High |
Year |
Symbol |
|
|
|
2016 |
AAPL |
80 |
95 |
110 |
TSLA |
50 |
80 |
130 |
WMT |
40 |
55 |
70 |
2017 |
AAPL |
50 |
120 |
140 |
GE |
100 |
30 |
40 |
IBM |
87 |
75 |
95 |
SLB |
20 |
55 |
85 |
TXN |
500 |
15 |
23 |
TSLA |
100 |
100 |
300 |
# 也可以横向连接。只要将axis参数设为columns或1
pd.concat(s_list, keys=['2016', '2017'], axis='columns', names=['Year', None])
Year |
2016 |
2017 |
|
Shares |
Low |
High |
Shares |
Low |
High |
AAPL |
80.0 |
95.0 |
110.0 |
50.0 |
120.0 |
140.0 |
TSLA |
50.0 |
80.0 |
130.0 |
100.0 |
100.0 |
300.0 |
WMT |
40.0 |
55.0 |
70.0 |
NaN |
NaN |
NaN |
GE |
NaN |
NaN |
NaN |
100.0 |
30.0 |
40.0 |
IBM |
NaN |
NaN |
NaN |
87.0 |
75.0 |
95.0 |
SLB |
NaN |
NaN |
NaN |
20.0 |
55.0 |
85.0 |
TXN |
NaN |
NaN |
NaN |
500.0 |
15.0 |
23.0 |
# concat函数默认使用的是外连接,会保留每个DataFrame中的所有行。也可以通过设定join参数,使用内连接:
pd.concat(s_list, join='inner', keys=['2016', '2017'], axis='columns', names=['Year', None])
Year |
2016 |
2017 |
|
Shares |
Low |
High |
Shares |
Low |
High |
Symbol |
|
|
|
|
|
|
AAPL |
80 |
95 |
110 |
50 |
120 |
140 |
TSLA |
50 |
80 |
130 |
100 |
100 |
300 |
append拼接DataFrame
# append是concat方法的超简化版本,append内部其实就是调用concat。pd.concat也可以
stocks_2016.append(stocks_2017)
|
Shares |
Low |
High |
Symbol |
|
|
|
AAPL |
80 |
95 |
110 |
TSLA |
50 |
80 |
130 |
WMT |
40 |
55 |
70 |
AAPL |
50 |
120 |
140 |
GE |
100 |
30 |
40 |
IBM |
87 |
75 |
95 |
SLB |
20 |
55 |
85 |
TXN |
500 |
15 |
23 |
TSLA |
100 |
100 |
300 |
#copy() 函数返回一个字典的浅复制。
stocks_2015 = stocks_2016.copy() #Python 字典(Dictionary) copy() 函数返回一个字典的浅复制。浅复制是指当对象的字段值被复制时,字段引用的对象不会被复制
stocks_2015
|
Shares |
Low |
High |
Symbol |
|
|
|
AAPL |
80 |
95 |
110 |
TSLA |
50 |
80 |
130 |
WMT |
40 |
55 |
70 |