分箱:抽象理解为苹果根据大小不同分级分箱
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
score_list = np.random.randint(35, 100, size=20)
score_list
array([93, 35, 83, 44, 56, 62, 37, 86, 44, 82, 49, 91, 49, 82, 53, 89, 47,
56, 38, 86])
bins = [0,59,70,80,100]
score_cut = pd.cut(score_list, bins)
score_cut
[(80, 100], (0, 59], (80, 100], (0, 59], (0, 59], ..., (80, 100], (0, 59], (0, 59], (0, 59], (80, 100]]
Length: 20
Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]
pd.value_counts(score_cut)
(0, 59] 11
(80, 100] 8
(59, 70] 1
(70, 80] 0
dtype: int64
df = DataFrame()
df['score'] = score_list
df
|
score |
0 |
93 |
1 |
35 |
2 |
83 |
3 |
44 |
4 |
56 |
5 |
62 |
6 |
37 |
7 |
86 |
8 |
44 |
9 |
82 |
10 |
49 |
11 |
91 |
12 |
49 |
13 |
82 |
14 |
53 |
15 |
89 |
16 |
47 |
17 |
56 |
18 |
38 |
19 |
86 |
df['student'] = [pd.util.testing.rands(3) for i in range(20)]
df
|
score |
student |
0 |
93 |
8c1 |
1 |
35 |
cHy |
2 |
83 |
6xy |
3 |
44 |
6gY |
4 |
56 |
tc5 |
5 |
62 |
r5T |
6 |
37 |
3z3 |
7 |
86 |
vsy |
8 |
44 |
F6h |
9 |
82 |
hgC |
10 |
49 |
xA9 |
11 |
91 |
iLZ |
12 |
49 |
BVK |
13 |
82 |
E9C |
14 |
53 |
rbE |
15 |
89 |
hSL |
16 |
47 |
AIt |
17 |
56 |
Gdk |
18 |
38 |
AFX |
19 |
86 |
JhU |
pd.cut(df['score'], bins)
0 (80, 100]
1 (0, 59]
2 (80, 100]
3 (0, 59]
4 (0, 59]
5 (59, 70]
6 (0, 59]
7 (80, 100]
8 (0, 59]
9 (80, 100]
10 (0, 59]
11 (80, 100]
12 (0, 59]
13 (80, 100]
14 (0, 59]
15 (80, 100]
16 (0, 59]
17 (0, 59]
18 (0, 59]
19 (80, 100]
Name: score, dtype: category
Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]
df['Categories'] = pd.cut(df['score'], bins, labels=['low','ok','good','great'])
df
|
score |
student |
Categories |
0 |
93 |
8c1 |
great |
1 |
35 |
cHy |
low |
2 |
83 |
6xy |
great |
3 |
44 |
6gY |
low |
4 |
56 |
tc5 |
low |
5 |
62 |
r5T |
ok |
6 |
37 |
3z3 |
low |
7 |
86 |
vsy |
great |
8 |
44 |
F6h |
low |
9 |
82 |
hgC |
great |
10 |
49 |
xA9 |
low |
11 |
91 |
iLZ |
great |
12 |
49 |
BVK |
low |
13 |
82 |
E9C |
great |
14 |
53 |
rbE |
low |
15 |
89 |
hSL |
great |
16 |
47 |
AIt |
low |
17 |
56 |
Gdk |
low |
18 |
38 |
AFX |
low |
19 |
86 |
JhU |
great |