pandas_cookbook学习(一)

版权声明:https://blog.csdn.net/thfyshz版权所有 https://blog.csdn.net/thfyshz/article/details/83692129

必要库的导入:

import pandas as pd
import numpy as np
import functools

1 Idioms(习语)

df = pd.DataFrame(
   ...:      {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df

	AAA	BBB	CCC
0	4	10	100
1	5	20	50
2	6	30	-30
3	7	40	-50

1.1 if-then(满足条件就)

#满足条件的就赋值,能改变原dataframe
df.loc[df.BBB>25, 'CCC'] = 0; df
	AAA	BBB	CCC
0	4	10	100
1	5	20	50
2	6	30	0
3	7	40	0

df.loc[df.AAA>5, ['BBB', 'CCC']] = -1; df
	AAA	BBB	CCC
0	4	10	100
1	5	20	50
2	6	-1	-1
3	7	-1	-1

#设定改变值的标志,True和False
df_mask = pd.DataFrame({'AAA' : [True] * 4, 'BBB' : [False] * 4,'CCC' : [True,False] * 2})
df.where(df_mask,-1000)
	AAA	BBB	CCC
0	4	-1000	100
1	5	-1000	-1000
2	6	-1000	-1
3	7	-1000	-1000

df = pd.DataFrame(
   ...:      {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
   	AAA	BBB	CCC
0	4	10	100
1	5	20	50
2	6	30	-30
3	7	40	-50
#以另一列是否满足条件为标志设定这一列的值
df['logic'] = np.where(df['AAA'] > 5,'high','low'); df
	AAA	BBB	CCC	logic
0	4	10	100	low
1	5	20	50	low
2	6	30	-30	high
3	7	40	-50	high

1.2 Splitting(切片)

df = pd.DataFrame(
   ...:      {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
AAA	BBB	CCC
0	4	10	100
1	5	20	50
2	6	30	-30
3	7	40	-50

#按条件切片,不用改变原dataframe
dflow = df[df.AAA <= 5]; dflow
	AAA	BBB	CCC
0	4	10	100
1	5	20	50

dfhigh = df[df.AAA > 5]; dfhigh
	AAA	BBB	CCC
2	6	30	-30
3	7	40	-50

1.3 Building Criteria(由个人制定选择数据的标准)

df = pd.DataFrame(
   ....:      {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
	AAA	BBB	CCC
0	4	10	100
1	5	20	50
2	6	30	-30
3	7	40	-50

#用&和|代表“和”、“或”制定条件
newseries = df.loc[(df['BBB'] < 25) & (df['CCC'] >= -40), 'AAA']; newseries
0    4
1    5
Name: AAA, dtype: int64
df.loc[(df['BBB'] > 25) | (df['CCC'] >= 75), 'AAA'] = 0.1; df
AAA	BBB	CCC
0	0.1	10	100
1	5.0	20	50
2	0.1	30	-30
3	0.1	40	-50

df
	AAA	BBB	CCC
0	0.1	10	100
1	5.0	20	50
2	0.1	30	-30
3	0.1	40	-50
#argsort的用法:按大小排序并返回对应索引值
aValue = 43.0
df.loc[(df.CCC-aValue).abs().argsort()]
	AAA	BBB	CCC
1	5.0	20	50
0	0.1	10	100
2	0.1	30	-30
3	0.1	40	-50

df = pd.DataFrame(
   ....:      {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
	AAA	BBB	CCC
0	4	10	100
1	5	20	50
2	6	30	-30
3	7	40	-50
#挑选出满足以下所有条件的行:
Crit1 = df.AAA <= 5.5
Crit2 = df.BBB == 10.0
Crit3 = df.CCC > -40.0
CritList = [Crit1,Crit2,Crit3]
AllCrit = functools.reduce(lambda x,y: x & y, CritList)
df[AllCrit]
	AAA	BBB	CCC
0	4	10	100

猜你喜欢

转载自blog.csdn.net/thfyshz/article/details/83692129