pandas高级操作

import numpy as np
import pandas as pd
from pandas import DataFrame,Series

替换操作

替换操作可以同步作用于Series和DataFrame中
单值替换
- 普通替换：替换所有符合要求的元素:to_replace=15,value='e'
- 按列指定单值替换： to_replace={列标签：替换值} value='value'
多值替换
- 列表替换: to_replace=[] value=[]
- 字典替换（推荐） to_replace={to_replace:value,to_replace:value}

df = DataFrame(data=np.random.randint(0,100,size=(8,7)))
df

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	8	56	16	36	26	23
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

df.replace(to_replace=0,value='zero')

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	zero	8	56	16	36	26	23
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

df.replace(to_replace={0:'aaa',22:222666})

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	aaa	8	56	16	36	26	23
3	77	88	222666	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

df.iloc[2] = [0,0,0,0,0,0,0]
df

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	0	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

df.replace(to_replace={0:666})

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	666	666	666	666	666	666	666
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

#将指定列中的0替换成666
df.replace(to_replace={4:0},value=666)

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	666	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

映射操作

map是Series的一个方法
概念：创建一个映射关系列表，把values元素和一个特定的标签或者字符串绑定（给一个元素值提供不同的表现形式）
创建一个df，两列分别是姓名和薪资，然后给其名字起对应的英文名

dic = {
    'name':['张三','李四','王老五'],
    'salary':[22222,7777,11111]
}
df = DataFrame(data=dic)
df

	name	salary
0	张三	22222
1	李四	7777
2	王老五	11111

#创建一个df，两列分别是姓名和薪资，然后给其名字起对应的英文名
dic = {
    '张三':'tom',
    '李四':'jay',
    '王老五':'jerry'
}#映射关系表
df['e_name'] = df['name'].map(dic)
df

	name	salary	e_name
0	张三	22222	tom
1	李四	7777	jay
2	王老五	11111	jerry

运算工具

超过3000部分的钱缴纳50%的税，计算每个人的税后薪资

def after_sal(s):#返回税后薪资，参数s就是税前薪资
    return s - (s-3000)*0.5

df['after_sal'] = df['salary'].map(after_sal)
df

	name	salary	e_name	after_sal
0	张三	22222	tom	12611.0
1	李四	7777	jay	5388.5
2	王老五	11111	jerry	7055.5

apply操作：df的一个运算工具
- apply是df的一个方法
- 作用：可以将df中行或者列数据进行某种形式的运算操作。

def func(s):
    s = s.sum()
    print(s)
df.apply(func,axis=0)

张三李四王老五
41110
tomjayjerry
25055.0

name         None
salary       None
e_name       None
after_sal    None
dtype: object

映射索引

使用rename()函数替换行索引
参数介绍：
- index 替换行索引
- columns 替换列索引

df4 = DataFrame({'color':['white','gray','purple','blue','green'],'value':np.random.randint(10,size = 5)})
df4

	color	value
0	white	7
1	gray	7
2	purple	4
3	blue	0
4	green	2

#映射关系表：映射df中的行索引
new_index = {0:'first',1:'two',2:'three',3:'four',4:'five'}
#映射关系表：映射列索引
new_col={'color':'cc','value':'vv'}
#
df4.rename(new_index,columns=new_col)

	cc	vv
first	white	7
two	gray	7
three	purple	4
four	blue	0
five	green	2

排序实现的随机抽样

take()
np.random.permutation()

np.random.permutation(10)

array([8, 2, 6, 7, 3, 9, 1, 5, 0, 4])

df = DataFrame(data=np.random.randint(0,100,size=(100,3)),columns=['A','B',"C"])
df.take(np.random.permutation(3),axis=1).take(np.random.permutation(100),axis=0)[:50]

以下是部分数据：

	A	B	C
7	91	5	14
74	19	27	18
29	55	76	42
43	40	92	84
27	64	23	36
99	16	17	33
94	4	84	69
97	84	52	63
0	81	21	46
88	82	7	73
51	30	96	62
93	30	15	41
39	27	88	5
4	47	14	90
46	26	86	70
67	13	92	79
40	92	23	51

#如何将df的数据进行打乱操作

数据的分类处理

数据分类处理的核心：
- groupby()函数
- groups属性查看分组情况

df = DataFrame({'item':['Apple','Banana','Orange','Banana','Orange','Apple'],
                'price':[4,3,3,2.5,4,2],
               'color':['red','yellow','yellow','green','green','green'],
               'weight':[12,20,50,30,20,44]})
df

	color	item	price	weight
0	red	Apple	4.0	12
1	yellow	Banana	3.0	20
2	yellow	Orange	3.0	50
3	green	Banana	2.5	30
4	green	Orange	4.0	20
5	green	Apple	2.0	44

#计算出每一种水果的平均价格
df.groupby(by='item')

<pandas.core.groupby.DataFrameGroupBy object at 0x000001B6E3D4A0B8>

df.groupby(by='item').groups

{'Apple': Int64Index([0, 5], dtype='int64'),
 'Banana': Int64Index([1, 3], dtype='int64'),
 'Orange': Int64Index([2, 4], dtype='int64')}

df.groupby(by='item').mean()

	price	weight
item
Apple	3.00	28
Banana	2.75	25
Orange	3.50	35

df.groupby(by='item').mean()['price']

item
Apple     3.00
Banana    2.75
Orange    3.50
Name: price, dtype: float64

#推荐
df.groupby(by='item')['price'].mean()

item
Apple     3.00
Banana    2.75
Orange    3.50
Name: price, dtype: float64

#计算每种颜色水果的平均重量
df.groupby(by='color')['weight'].mean()

color
green     31.333333
red       12.000000
yellow    35.000000
Name: weight, dtype: float64

#将每种水果的平均价格数据汇总到原数据中
mean_price_series = df.groupby(by='item')['price'].mean()
mean_price_series

item
Apple     3.00
Banana    2.75
Orange    3.50
Name: price, dtype: float64

dic = mean_price_series.to_dict()
dic

{'Apple': 3.0, 'Banana': 2.75, 'Orange': 3.5}

# df['mean_price'] = mean_price_series
df['mean_price'] = df['item'].map(dic)
df

	color	item	price	weight	mean_price
0	red	Apple	4.0	12	3.00
1	yellow	Banana	3.0	20	2.75
2	yellow	Orange	3.0	50	3.50
3	green	Banana	2.5	30	2.75
4	green	Orange	4.0	20	3.50
5	green	Apple	2.0	44	3.00

高级数据聚合

使用groupby分组后，也可以使用transform和apply提供自定义函数实现更多的运算
df.groupby('item')['price'].sum() <==> df.groupby('item')['price'].apply(sum)
transform和apply都会进行运算，在transform或者apply中传入函数即可
transform和apply也可以传入一个lambda表达式

df.groupby(by='item')['price'].sum()

item
Apple     6.0
Banana    5.5
Orange    7.0
Name: price, dtype: float64

def abc(s):
    return s

df.groupby(by='item')['price'].abc() #error

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-70-dc3538655de8> in <module>()
----> 1 df.groupby(by='item')['price'].abc()



C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\groupby.py in __getattr__(self, attr)
    674 
    675         raise AttributeError("%r object has no attribute %r" %
--> 676                              (type(self).__name__, attr))
    677 
    678     plot = property(GroupByPlot)



AttributeError: 'SeriesGroupBy' object has no attribute 'abc'

df.groupby(by='item')['price'].transform(abc)#right

0    4.0
1    3.0
2    3.0
3    2.5
4    4.0
5    2.0
Name: price, dtype: float64

#自定义一个求均值的函数，作用到分组结果中
def my_mean(s):
    sum = 0
    for i in s:
        sum+=i
    return sum/s.size
df.groupby(by='item')['price'].transform(my_mean)

0    3.00
1    2.75
2    3.50
3    2.75
4    3.50
5    3.00
Name: price, dtype: float64

#忽略
def my_mean(s):
    sum = 0
    for i in s:
        sum+=i
    return sum/s.size
df.groupby(by='item')['price'].apply(my_mean)

# 可以看出apply传出的数据是按照分组显示的，而transform输出的数据还是按照索引来显示的

item
Apple     3.00
Banana    2.75
Orange    3.50
Name: price, dtype: float64

数据加载

读取type-.txt文件数据

import pandas as pd
from pandas import DataFrame,Series
data=pd.read_csv('./data/type-.txt')
data

	你好-我好-他也好
0	也许-大概-有可能
1	然而-未必-不见得

将文件中每一个词作为元素存放在DataFrame中

data=pd.read_csv('./data/type-.txt',sep='-',header=None)
data

# sep是指定以什么来分割，这里是以'-'来分割数据
# header=None 表示的是这个文件没有头标题，从第一行开始读

	0	1	2
0	你好	我好	他也好
1	也许	大概	有可能
2	然而	未必	不见得

读取数据库中的数据

#连接数据库，获取连接对象
import sqlite3 as sqlite3
conn=sqlite3.connect('./data/weather_2012.sqlite')

#读取库表中的数据值
sql_df=pd.read_sql('select * from weather_2012',conn)
sql_df

	index	Date/Time	Temp (C)	Dew Point Temp (C)	Rel Hum (%)	Wind Spd (km/h)	Visibility (km)	Stn Press (kPa)	Weather
0	0.0	2012-01-01 00:00:00	-1.8	-3.9	86.0	4.0	8.0	101.24	Fog
1	1.0	2012-01-01 01:00:00	-1.8	-3.7	87.0	4.0	8.0	101.24	Fog
2	2.0	2012-01-01 02:00:00	-1.8	-3.4	89.0	7.0	4.0	101.26	Freezing Drizzle,Fog
3	3.0	2012-01-01 03:00:00	-1.5	-3.2	88.0	6.0	4.0	101.27	Freezing Drizzle,Fog
4	4.0	2012-01-01 04:00:00	-1.5	-3.3	88.0	7.0	4.8	101.23	Fog
5	5.0	2012-01-01 05:00:00	-1.4	-3.3	87.0	9.0	6.4	101.27	Fog
6	6.0	2012-01-01 06:00:00	-1.5	-3.1	89.0	7.0	6.4	101.29	Fog
7	7.0	2012-01-01 07:00:00	-1.4	-3.6	85.0	7.0	8.0	101.26	Fog
8	8.0	2012-01-01 08:00:00	-1.4	-3.6	85.0	9.0	8.0	101.23	Fog
9	9.0	2012-01-01 09:00:00	-1.3	-3.1	88.0	15.0	4.0	101.20	Fog
10	10.0	2012-01-01 10:00:00	-1.0	-2.3	91.0	9.0	1.2	101.15	Fog
11	11.0	2012-01-01 11:00:00	-0.5	-2.1	89.0	7.0	4.0	100.98	Fog
12	12.0	2012-01-01 12:00:00	-0.2	-2.0	88.0	9.0	4.8	100.79	Fog
13	13.0	2012-01-01 13:00:00	0.2	-1.7	87.0	13.0	4.8	100.58	Fog
14	14.0	2012-01-01 14:00:00	0.8	-1.1	87.0	20.0	4.8	100.31	Fog
15	15.0	2012-01-01 15:00:00	1.8	-0.4	85.0	22.0	6.4	100.07	Fog
16	16.0	2012-01-01 16:00:00	2.6	-0.2	82.0	13.0	12.9	99.93	Mostly Cloudy
17	17.0	2012-01-01 17:00:00	3.0	0.0	81.0	13.0	16.1	99.81	Cloudy
18	18.0	2012-01-01 18:00:00	3.8	1.0	82.0	15.0	12.9	99.74	Rain
...	...	...	...	...	...	...	...	...	...

	color	item	price	weight	mean_price
0	red	Apple	4.0	12	3.00
1	yellow	Banana	3.0	20	2.75
2	yellow	Orange	3.0	50	3.50
3	green	Banana	2.5	30	2.75
4	green	Orange	4.0	20	3.50
5	green	Apple	2.0	44	3.00

#将一个df中的数据值写入存储到db
df.to_sql('fruit',conn)

pd.read_sql('select * from fruit',conn)

	index	color	item	price	weight	mean_price
0	0	red	Apple	4.0	12	3.00
1	1	yellow	Banana	3.0	20	2.75
2	2	yellow	Orange	3.0	50	3.50
3	3	green	Banana	2.5	30	2.75
4	4	green	Orange	4.0	20	3.50
5	5	green	Apple	2.0	44	3.00

透视表

透视表是一种可以对数据动态排布并且分类汇总的表格格式。或许大多数人都在Excel使用过数据透视表，也体会到它的强大功能，而在pandas中它被称作pivot_table。
透视表的优点：
- 灵活性高，可以随意定制你的分析计算要求
- 脉络清晰易于理解数据
- 操作性强，报表神器

import pandas as pd
import numpy as np
df = pd.read_csv('./data/lanqiusai.csv')
df.head()

	对手	胜负	主客场	命中	投篮数	投篮命中率	3分命中率	篮板	助攻	得分
0	勇士	胜	客	10	23	0.435	0.444	6	11	27
1	国王	胜	客	8	21	0.381	0.286	3	9	27
2	小牛	胜	主	10	19	0.526	0.462	3	7	29
3	灰熊	负	主	8	20	0.400	0.250	5	8	22
4	76人	胜	客	10	20	0.500	0.250	3	13	27

pivot_table有四个最重要的参数index、values、columns、aggfunc

index参数：分类汇总的分类条件
- 每个pivot_table必须拥有一个index。如果想查看哈登对阵每个队伍的得分则需要对每一个队伍进行分类并计算其各类得分的平均值：

df.pivot_table(index='对手')#默认分类汇总使用的聚合函数是mean

	3分命中率	助攻	命中	得分	投篮命中率	投篮数	篮板
对手
76人	0.33950	10.00	9.0	28.00	0.4405	20.5	3.5
勇士	0.44400	11.00	10.0	27.00	0.4350	23.0	6.0
国王	0.28600	9.00	8.0	27.00	0.3810	21.0	3.0
太阳	0.54500	7.00	12.0	48.00	0.5450	22.0	2.0
小牛	0.46200	7.00	10.0	29.00	0.5260	19.0	3.0
尼克斯	0.36900	9.50	10.5	34.00	0.4175	25.0	3.5
开拓者	0.57100	3.00	16.0	48.00	0.5520	29.0	8.0
掘金	0.14300	9.00	6.0	21.00	0.3750	16.0	8.0
步行者	0.29150	12.50	8.5	27.50	0.3965	21.5	6.5
湖人	0.44400	9.00	13.0	36.00	0.5910	22.0	4.0
灰熊	0.35025	7.75	8.5	27.25	0.4015	21.0	4.5
爵士	0.60400	8.00	13.5	42.50	0.5905	22.0	3.5
猛龙	0.27300	11.00	8.0	38.00	0.3200	25.0	6.0
篮网	0.61500	8.00	13.0	37.00	0.6500	20.0	10.0
老鹰	0.54500	11.00	8.0	29.00	0.5330	15.0	3.0
骑士	0.42900	13.00	8.0	35.00	0.3810	21.0	11.0
鹈鹕	0.40000	17.00	8.0	26.00	0.5000	16.0	1.0
黄蜂	0.40000	11.00	8.0	27.00	0.4440	18.0	10.0

想看看对阵同一对手在不同主客场下的数据，分类条件为对手和主客场

df.pivot_table(index=['对手','主客场'])

		3分命中率	助攻	命中	得分	投篮命中率	投篮数	篮板
对手	主客场
76人	主	0.4290	7.0	8.0	29.0	0.381	21.0	4.0
76人	客	0.2500	13.0	10.0	27.0	0.500	20.0	3.0
勇士	客	0.4440	11.0	10.0	27.0	0.435	23.0	6.0
国王	客	0.2860	9.0	8.0	27.0	0.381	21.0	3.0
太阳	客	0.5450	7.0	12.0	48.0	0.545	22.0	2.0
小牛	主	0.4620	7.0	10.0	29.0	0.526	19.0	3.0
尼克斯	主	0.3850	10.0	12.0	37.0	0.444	27.0	2.0
尼克斯	客	0.3530	9.0	9.0	31.0	0.391	23.0	5.0
开拓者	客	0.5710	3.0	16.0	48.0	0.552	29.0	8.0
掘金	主	0.1430	9.0	6.0	21.0	0.375	16.0	8.0
步行者	主	0.3330	10.0	8.0	29.0	0.364	22.0	8.0
步行者	客	0.2500	15.0	9.0	26.0	0.429	21.0	5.0
湖人	客	0.4440	9.0	13.0	36.0	0.591	22.0	4.0
灰熊	主	0.3395	8.0	9.5	30.0	0.420	22.5	4.5
灰熊	客	0.3610	7.5	7.5	24.5	0.383	19.5	4.5
爵士	主	0.8750	13.0	19.0	56.0	0.760	25.0	2.0
爵士	客	0.3330	3.0	8.0	29.0	0.421	19.0	5.0
猛龙	主	0.2730	11.0	8.0	38.0	0.320	25.0	6.0
篮网	主	0.6150	8.0	13.0	37.0	0.650	20.0	10.0
老鹰	客	0.5450	11.0	8.0	29.0	0.533	15.0	3.0
骑士	主	0.4290	13.0	8.0	35.0	0.381	21.0	11.0
鹈鹕	主	0.4000	17.0	8.0	26.0	0.500	16.0	1.0
黄蜂	客	0.4000	11.0	8.0	27.0	0.444	18.0	10.0

values参数：需要对计算的数据进行筛选
- 如果我们只需要哈登在主客场和不同胜负情况下的得分、篮板与助攻三项数据：

df.pivot_table(index=['主客场','胜负'],values=['得分','篮板','助攻'])

		助攻	得分	篮板
主客场	胜负
主	胜	10.555556	34.222222	5.444444
主	负	8.666667	29.666667	5.000000
客	胜	9.000000	32.000000	4.916667
客	负	8.000000	20.000000	4.000000

Aggfunc参数：设置我们对数据聚合时进行的函数操作
- 当我们未设置aggfunc时，它默认aggfunc='mean'计算均值。
还想获得james harden在主客场和不同胜负情况下的总得分、总篮板、总助攻时：

df.pivot_table(index=['主客场','胜负'],values=['得分','篮板','助攻'],aggfunc='sum')

		助攻	得分	篮板
主客场	胜负
主	胜	95	308	49
主	负	26	89	15
客	胜	108	384	59
客	负	8	20	4

#还想获得james harden在主客场和不同胜负情况下的平均得分、总篮板、最小助攻时
df.pivot_table(index=['主客场','胜负'],aggfunc={'得分':'mean','篮板':'sum','助攻':'min'})

		助攻	得分	篮板
主客场	胜负
主	胜	7	34.222222	49
主	负	7	29.666667	15
客	胜	3	32.000000	59
客	负	8	20.000000	4

Columns:可以设置列层次字段
- 对values字段进行分类

#获取所有队主客场的总得分
df.pivot_table(index='主客场',values='得分',aggfunc='sum')

	得分
主客场
主	397
客	404

#获取每个队主客场的总得分（在总得分的基础上又进行了对手的分类）
df.pivot_table(index='主客场',values='得分',aggfunc='sum',columns='对手').fillna(value=0)
# columns可以增加一个列，这里增加了一个对手的列
# 为了更好的显示，最后将空值替换成0

对手	76人	勇士	国王	太阳	小牛	尼克斯	开拓者	掘金	步行者	湖人	灰熊	爵士	猛龙	篮网	老鹰	骑士	鹈鹕	黄蜂
主客场
主	29.0	0.0	0.0	0.0	29.0	37.0	0.0	21.0	29.0	0.0	60.0	56.0	38.0	37.0	0.0	35.0	26.0	0.0
客	27.0	27.0	27.0	48.0	0.0	31.0	48.0	0.0	26.0	36.0	49.0	29.0	0.0	0.0	29.0	0.0	0.0	27.0

交叉表

是一种用于计算分组的特殊透视图,对数据进行汇总
pd.crosstab(index,colums)
- index:分组数据，交叉表的行索引
- columns:交叉表的列索引

df = DataFrame({'sex':['man','man','women','women','man','women','man','women','women'],
               'age':[15,23,25,17,35,57,24,31,22],
               'smoke':[True,False,False,True,True,False,False,True,False],
               'height':[168,179,181,166,173,178,188,190,160]})
df

	age	height	sex	smoke
0	15	168	man	True
1	23	179	man	False
2	25	181	women	False
3	17	166	women	True
4	35	173	man	True
5	57	178	women	False
6	24	188	man	False
7	31	190	women	True
8	22	160	women	False

求出各个性别抽烟的人数

pd.crosstab(df.smoke,df.sex)

sex	man	women
smoke
False	2	3
True	2	2

求出各个年龄段抽烟人情况

pd.crosstab(df.age,df.smoke)

smoke	False	True
age
15	0	1
17	0	1
22	1	0
23	1	0
24	1	0
25	1	0
31	0	1
35	0	1
57	1	0

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	8	56	16	36	26	23
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	0	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	666	666	666	666	666	666	666
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	666	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	A	B	C
7	91	5	14
74	19	27	18
29	55	76	42
43	40	92	84
27	64	23	36
99	16	17	33
94	4	84	69
97	84	52	63
0	81	21	46
88	82	7	73
51	30	96	62
93	30	15	41
39	27	88	5
4	47	14	90
46	26	86	70
67	13	92	79
40	92	23	51

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	8	56	16	36	26	23
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	0	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	666	666	666	666	666	666	666
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	666	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	A	B	C
7	91	5	14
74	19	27	18
29	55	76	42
43	40	92	84
27	64	23	36
99	16	17	33
94	4	84	69
97	84	52	63
0	81	21	46
88	82	7	73
51	30	96	62
93	30	15	41
39	27	88	5
4	47	14	90
46	26	86	70
67	13	92	79
40	92	23	51

pandas高级操作

pandas高级操作

替换操作

映射操作

运算工具

映射索引

排序实现的随机抽样

数据的分类处理

高级数据聚合

数据加载

透视表

pivot_table有四个最重要的参数index、values、columns、aggfunc

交叉表

猜你喜欢

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	8	56	16	36	26	23
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	0	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	666	666	666	666	666	666	666
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	0	1	2	3	4	5	6
0	34	55	9	28	65	38	13
1	34	61	58	93	35	10	65
2	0	0	0	0	666	0	0
3	77	88	22	98	83	82	21
4	99	43	17	54	87	77	82
5	35	41	33	86	10	16	35
6	27	87	93	32	47	40	69
7	62	63	48	94	52	21	99

	A	B	C
7	91	5	14
74	19	27	18
29	55	76	42
43	40	92	84
27	64	23	36
99	16	17	33
94	4	84	69
97	84	52	63
0	81	21	46
88	82	7	73
51	30	96	62
93	30	15	41
39	27	88	5
4	47	14	90
46	26	86	70
67	13	92	79
40	92	23	51