【数据分析&数据挖掘】pandas交叉表&透视表

一、交叉表

 1 import pandas as pd
 2 import numpy as np
 3 
 4 # 加载数据
 5 detail = pd.read_excel("../day05/meal_order_detail.xlsx")
 6 # print("detail: \n", detail)
 7 print("detail的列名称: \n", detail.columns)
 8 
 9 # 构建交叉表
10 # index ——指定某列进行行分组
11 # columns ——指定某列进行列分组
12 # 如果不传其他参数,只传index 与columns, 那么交叉表构建 index 与 columns的相对数量
13 
14 res = pd.crosstab(
15     index=detail.loc[:, "dishes_name"],
16     columns=detail.loc[:, "amounts"],
17 )
18 print("res: \n", res)
19 
20 # 以index这一列为行分组,以columns这一列为列分组,对values进行统计aggfunc指标
21 # values 与 aggfunc必须同时存在
22 # 只能按照单列进行行分组,只能按照按照单列进行列分组,只能统计某一列的aggfunc指标
23 res = pd.crosstab(
24     index=detail.loc[:, "dishes_name"],
25     columns=detail.loc[:, "amounts"],
26     values=detail.loc[:, "counts"],
27     aggfunc=np.max
28 )
29 
30 print("res: \n", res)

二、透视表

 1 import pandas as pd
 2 import numpy as np
 3 
 4 # 创建一个dataframe
 5 df = pd.DataFrame(
 6     data={
 7         "cls_id": ["A", "B", "C", "A", "B", "C", "A", "B", "C", "A", "B","C"],
 8         "group_id": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2],
 9         "name": ["xixi", "haha", "taotao", "huihui", "ranran", "Island", "Tree" ,"bamao", "simao", "hanhan", "qimao", "sanmao"],
10         "score": [92, 93, 39, 89, 90.5, 80, 91, 92, 65, 73, 34.5, 56],
11         "height": [165, 166, 167, 168, 152, 193, 192, 190, 173, 172, 170, 169]
12     },
13 )
14 
15 res = df.groupby(by="cls_id")[["score", "height"]].mean()
16 print("groupby-res: \n", res)
17 
18 # data ——df
19 # index 指定行分组 ——可以按照多列进行行分组
20 # values ——aggfunc 所针对的主体,可以是多列
21 # aggfunc ——默认为均值
22 
23 # 创建透视表
24 res = pd.pivot_table(
25     data=df,
26     # index="cls_id",  # 按照行或列分组
27     # columns="cls_id",
28     columns=["cls_id", "group_id"],
29     values=["score", "height"],
30     aggfunc="mean",
31     margins=True,
32     margins_name="xixi"
33 )
34 
35 print("res: \n", res)
36 
37 print("~"*60)
38 
39 # 加载数据
40 detail = pd.read_excel("../day05/meal_order_detail.xlsx")
41 # print("detail: \n", detail)
42 print("detail的列名称: \n", detail.columns)
43 
44 res = pd.pivot_table(
45     data=detail,
46     # index="order_id",
47     # index="cost",
48     columns="dishes_id",
49     # values="amounts",
50     values="cost",
51     aggfunc="max",
52     fill_value=-1  # 将全为空的数据进行填充
53 )
54 
55 print("res: \n", res)

猜你喜欢

转载自www.cnblogs.com/Tree0108/p/12116059.html