【数据分析可视化】透视表

前提要对数据信息之间的关系要了解

import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# 读入excel文件
df = pd.read_excel('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/sales-funnel.xlsx')
df
Account Name Rep Manager Product Quantity Price Status
0 714466 Trantow-Barrows Craig Booker Debra Henley CPU 1 30000 presented
1 714466 Trantow-Barrows Craig Booker Debra Henley Software 1 10000 presented
2 714466 Trantow-Barrows Craig Booker Debra Henley Maintenance 2 5000 pending
3 737550 Fritsch, Russel and Anderson Craig Booker Debra Henley CPU 1 35000 declined
4 146832 Kiehn-Spinka Daniel Hilton Debra Henley CPU 2 65000 won
5 218895 Kulas Inc Daniel Hilton Debra Henley CPU 2 40000 pending
6 218895 Kulas Inc Daniel Hilton Debra Henley Software 1 10000 presented
7 412290 Jerde-Hilpert John Smith Debra Henley Maintenance 2 5000 pending
8 740150 Barton LLC John Smith Debra Henley CPU 1 35000 declined
9 141962 Herman LLC Cedric Moss Fred Anderson CPU 2 65000 won
10 163416 Purdy-Kunde Cedric Moss Fred Anderson CPU 1 30000 presented
11 239344 Stokes LLC Cedric Moss Fred Anderson Maintenance 1 5000 pending
12 239344 Stokes LLC Cedric Moss Fred Anderson Software 1 10000 presented
13 307599 Kassulke, Ondricka and Metz Wendy Yule Fred Anderson Maintenance 3 7000 won
14 688981 Keeling LLC Wendy Yule Fred Anderson CPU 5 100000 won
15 729833 Koepp Ltd Wendy Yule Fred Anderson CPU 2 65000 declined
16 729833 Koepp Ltd Wendy Yule Fred Anderson Monitor 2 5000 presented
# 透视表-去重复(聚合默认 agg平均值)
pd.pivot_table(df, index=['Name'])
Account Price Quantity
Name
Barton LLC 740150 35000 1.000000
Fritsch, Russel and Anderson 737550 35000 1.000000
Herman LLC 141962 65000 2.000000
Jerde-Hilpert 412290 5000 2.000000
Kassulke, Ondricka and Metz 307599 7000 3.000000
Keeling LLC 688981 100000 5.000000
Kiehn-Spinka 146832 65000 2.000000
Koepp Ltd 729833 35000 2.000000
Kulas Inc 218895 25000 1.500000
Purdy-Kunde 163416 30000 1.000000
Stokes LLC 239344 7500 1.000000
Trantow-Barrows 714466 15000 1.333333
# 透视表-去重复(聚合默认 agg-sum)
pd.pivot_table(df, index=['Name'], aggfunc='sum')
Account Price Quantity
Name
Barton LLC 740150 35000 1
Fritsch, Russel and Anderson 737550 35000 1
Herman LLC 141962 65000 2
Jerde-Hilpert 412290 5000 2
Kassulke, Ondricka and Metz 307599 7000 3
Keeling LLC 688981 100000 5
Kiehn-Spinka 146832 65000 2
Koepp Ltd 1459666 70000 4
Kulas Inc 437790 50000 3
Purdy-Kunde 163416 30000 1
Stokes LLC 478688 15000 2
Trantow-Barrows 2143398 45000 4
# 透视表-多列索引(基本不变)
pd.pivot_table(df, index=['Name','Rep','Manager'])
Account Price Quantity
Name Rep Manager
Barton LLC John Smith Debra Henley 740150 35000 1.000000
Fritsch, Russel and Anderson Craig Booker Debra Henley 737550 35000 1.000000
Herman LLC Cedric Moss Fred Anderson 141962 65000 2.000000
Jerde-Hilpert John Smith Debra Henley 412290 5000 2.000000
Kassulke, Ondricka and Metz Wendy Yule Fred Anderson 307599 7000 3.000000
Keeling LLC Wendy Yule Fred Anderson 688981 100000 5.000000
Kiehn-Spinka Daniel Hilton Debra Henley 146832 65000 2.000000
Koepp Ltd Wendy Yule Fred Anderson 729833 35000 2.000000
Kulas Inc Daniel Hilton Debra Henley 218895 25000 1.500000
Purdy-Kunde Cedric Moss Fred Anderson 163416 30000 1.000000
Stokes LLC Cedric Moss Fred Anderson 239344 7500 1.000000
Trantow-Barrows Craig Booker Debra Henley 714466 15000 1.333333
# 透视表-包含关系
pd.pivot_table(df, index=['Manager','Rep'])
Account Price Quantity
Manager Rep
Debra Henley Craig Booker 720237.0 20000.000000 1.250000
Daniel Hilton 194874.0 38333.333333 1.666667
John Smith 576220.0 20000.000000 1.500000
Fred Anderson Cedric Moss 196016.5 27500.000000 1.250000
Wendy Yule 614061.5 44250.000000 3.000000
# 透视表指定 values默认是None
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'])
Price Quantity
Manager Rep
Debra Henley Craig Booker 20000.000000 1.250000
Daniel Hilton 38333.333333 1.666667
John Smith 20000.000000 1.500000
Fred Anderson Cedric Moss 27500.000000 1.250000
Wendy Yule 44250.000000 3.000000
# 透视表-col
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'])
Price Quantity
Product CPU Maintenance Monitor Software CPU Maintenance Monitor Software
Manager Rep
Debra Henley Craig Booker 32500.0 5000.0 NaN 10000.0 1.0 2.0 NaN 1.0
Daniel Hilton 52500.0 NaN NaN 10000.0 2.0 NaN NaN 1.0
John Smith 35000.0 5000.0 NaN NaN 1.0 2.0 NaN NaN
Fred Anderson Cedric Moss 47500.0 5000.0 NaN 10000.0 1.5 1.0 NaN 1.0
Wendy Yule 82500.0 7000.0 5000.0 NaN 3.5 3.0 2.0 NaN
# 透视表-填充nan
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'],fill_value=0)
Price Quantity
Product CPU Maintenance Monitor Software CPU Maintenance Monitor Software
Manager Rep
Debra Henley Craig Booker 32500 5000 0 10000 1.0 2 0 1
Daniel Hilton 52500 0 0 10000 2.0 0 0 1
John Smith 35000 5000 0 0 1.0 2 0 0
Fred Anderson Cedric Moss 47500 5000 0 10000 1.5 1 0 1
Wendy Yule 82500 7000 5000 0 3.5 3 2 0
原创文章 257 获赞 187 访问量 16万+

猜你喜欢

转载自blog.csdn.net/weixin_43469680/article/details/105665566