[Visualización del análisis de datos] Reindexación de series y marcos de datos

import numpy as np
import pandas as pd
from pandas import Series, DataFrame

Reindex de serie

s1 =  Series([1,2,3,4], index=['A','B','C','D'])
s1
A    1
B    2
C    3
D    4
dtype: int64
 # reindex新的Series索引 (查看参数快捷键shift+tab)
s1.reindex(index=['A','B','C','D','E'])
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
dtype: float64
# reindex并给空值索引赋值
s1.reindex(index=['A','B','C','D','E'],fill_value=10)
A     1
B     2
C     3
D     4
E    10
dtype: int64
# 测试reindex的赋值方式
s2 = Series(['A','B','C'],index=[1,5,10])
s2
1     A
5     B
10    C
dtype: object
# 重新索引
s2.reindex(index=range(15))
0     NaN
1       A
2     NaN
3     NaN
4     NaN
5       B
6     NaN
7     NaN
8     NaN
9     NaN
10      C
11    NaN
12    NaN
13    NaN
14    NaN
dtype: object
# 函数填充值(ffill函数将重新索引前安装区间范围依次赋值)
s2.reindex(index=range(15),method='ffill')
0     NaN
1       A
2       A
3       A
4       A
5       B
6       B
7       B
8       B
9       B
10      C
11      C
12      C
13      C
14      C
dtype: object

DataFrame reindex

# 故意漏掉C
df1 = DataFrame(np.random.rand(25).reshape([5,5]), index=['A','B','D','E','F'], columns=['c1','c2','c3','c4','c5'])
df1

c1 c2 c3 c4 c5
UNA 0.916976 0.277428 0.487468 0.392332 0.906246
si 0.112718 0.000009 0.958650 0.890877 0.640683
re 0.715841 0.857899 0.834954 0.134856 0.982175
mi 0.375207 0.925308 0,734072 0.583107 0.677676
F 0.627784 0.818094 0.636362 0.417960 0.063043
# reindex恢复C-重新索引行
df1.reindex(['A','B','C','D','E','F'])
c1 c2 c3 c4 c5
UNA 0.916976 0.277428 0.487468 0.392332 0.906246
si 0.112718 0.000009 0.958650 0.890877 0.640683
C NaN NaN NaN NaN NaN
re 0.715841 0.857899 0.834954 0.134856 0.982175
mi 0.375207 0.925308 0,734072 0.583107 0.677676
F 0.627784 0.818094 0.636362 0.417960 0.063043
# reindex-重新索引列
df1.reindex(columns=['c1','c2','c3','c4','c5','c6'])
c1 c2 c3 c4 c5 c6
UNA 0.916976 0.277428 0.487468 0.392332 0.906246 NaN
si 0.112718 0.000009 0.958650 0.890877 0.640683 NaN
re 0.715841 0.857899 0.834954 0.134856 0.982175 NaN
mi 0.375207 0.925308 0,734072 0.583107 0.677676 NaN
F 0.627784 0.818094 0.636362 0.417960 0.063043 NaN
 # reindex-行列同时重排(新增)
df1.reindex(index=['A','B','C','D','E','F'] ,columns=['c1','c2','c3','c4','c5','c6'])
c1 c2 c3 c4 c5 c6
UNA 0.916976 0.277428 0.487468 0.392332 0.906246 NaN
si 0.112718 0.000009 0.958650 0.890877 0.640683 NaN
C NaN NaN NaN NaN NaN NaN
re 0.715841 0.857899 0.834954 0.134856 0.982175 NaN
mi 0.375207 0.925308 0,734072 0.583107 0.677676 NaN
F 0.627784 0.818094 0.636362 0.417960 0.063043 NaN
# Series(减少)
s1
A    1
B    2
C    3
D    4
dtype: int64
s1.reindex(index=['A','B'])
A    1
B    2
dtype: int64
# reindex-行列同时重排(减少)
df1
c1 c2 c3 c4 c5
UNA 0.916976 0.277428 0.487468 0.392332 0.906246
si 0.112718 0.000009 0.958650 0.890877 0.640683
re 0.715841 0.857899 0.834954 0.134856 0.982175
mi 0.375207 0.925308 0,734072 0.583107 0.677676
F 0.627784 0.818094 0.636362 0.417960 0.063043
df1.reindex(['A','B'])
c1 c2 c3 c4 c5
UNA 0.916976 0.277428 0.487468 0.392332 0.906246
si 0.112718 0.000009 0.958650 0.890877 0.640683
# 删掉的方法
s1.drop('A')
B    2
C    3
D    4
dtype: int64
# 0行1列
df1.drop('A',axis=0)
c1 c2 c3 c4 c5
si 0.112718 0.000009 0.958650 0.890877 0.640683
re 0.715841 0.857899 0.834954 0.134856 0.982175
mi 0.375207 0.925308 0,734072 0.583107 0.677676
F 0.627784 0.818094 0.636362 0.417960 0.063043
df1.drop('c1',axis=1)
c2 c3 c4 c5
UNA 0.277428 0.487468 0.392332 0.906246
si 0.000009 0.958650 0.890877 0.640683
re 0.857899 0.834954 0.134856 0.982175
mi 0.925308 0,734072 0.583107 0.677676
F 0.818094 0.636362 0.417960 0.063043
234 artículos originales publicados · Me gusta 164 · Visitas 140,000+

Supongo que te gusta

Origin blog.csdn.net/weixin_43469680/article/details/105599586
Recomendado
Clasificación