机器学习之pandas 索引笔记

 1 import pandas as pd
 2 import numpy as np
 3 
 4 s = pd.Series(np.random.rand(5), index=list('abcde'))
 5 ＃ 创建序列，其中 index=list('abcde')为每一行添加索引
 6 s.index.name='alpha'  # 为行索引添加名称标签
 7 
 8 df = pd.DataFrame(np.random.randn(4,3), columns=['one','two','three'])
 9 # 创建DataFrame，其中columns=['one','two','three']表示为每一列添加索引
10 df.index.name = 'row'  # 为行索引添加名称标签
11 df.columns.name = 'col'  # 为列索引添加名称标签

 1 import pandas as pd
 2 import numpy as np
 3 
 4 s = pd.Series(np.arange(6), index=list('abcbda'))
 5 # 创建一个带有重复索引的Series
 6 
 7 s['a']  # 找出a的所有索引对应的值
 8 s.index.is_unique  # 判断s中每个索引是否独一无二
 9 s.index.unique()  # 找出s中不重复的索引
10 
11 s.groupby(s.index).sum()  # 将索引分组并求和
12 s.groupby(s.index).mean()  # 将索引分组并求均值
13 s.groupby(s.index).first()  # 将索引分组并取第一项

 1 import pandas as pd
 2 import numpy as np
 3 
 4 a = [['a','a','a','b','b','c','c'],[1,2,3,1,2,2,3]]
 5 t = list(zip(*a))
 6 index = pd.MultiIndex.from_tuples(t,names=['level1','level2'])
 7 s = pd.Series(np.random.rand(7),index=index)
 8 # 输出 s
 9 level1  level2
10 a        1         0.029233
11           2         0.539508
12           3         0.502217
13 b        1         0.536222
14           2         0.217398
15 c        2         0.551864
16          3         0.596248
17 
18 s['b'] 
19 # 输出
20 level2
21 1    0.536222
22 2    0.217398
23 dtype: float64
24 
25 s['b':'c']
26 # 输出
27 level1  level2
28 b       1         0.536222
29         2         0.217398
30 c       2         0.551864
31         3         0.596248
32 dtype: float64
33 
34 s[['a','c']]
35 # 输出
36 level1  level2
37 a       1         0.029233
38         2         0.539508
39         3         0.502217
40 c       2         0.551864
41         3         0.596248
42 dtype: float64
43 
44 s[:,2]
45 # 输出
46 level1
47 a    0.539508
48 b    0.217398
49 c    0.551864
50 dtype: float64

机器学习之pandas 索引笔记

猜你喜欢