target encoding

# importing libraries
import pandas as pd

# creating dataset
data={
    
    'SubjectName':['s1','s2','s3','s1','s4','s3','s2','s1','s2','s4','s1'],
	'Target':[1,0,1,1,1,0,0,1,1,1,0]}

df = pd.DataFrame(data)

print(df)
     SubjectName  Target
0    s1    1
1    s2    0
2    s3    1
3    s1    1
4    s4    1
5    s3    0
6    s2    0
7    s1    1
8    s2    1
9    s4    1
10    s1    0
df.groupby(['SubjectName'])['Target'].count()
df.groupby(['SubjectName'])['Target'].mean()
Mean_encoded_subject = df.groupby(['SubjectName'])['Target'].mean().to_dict()
df['SubjectName'] = df['SubjectName'].map(Mean_encoded_subject)
print(df)
    SubjectName    Target
0    0.750000    1
1    0.333333    0
2    0.500000    1
3    0.750000    1
4    1.000000    1
5    0.500000    0
6    0.333333    0
7    0.750000    1
8    0.333333    1
9    1.000000    1
10    0.750000    0

Guess you like

Origin blog.csdn.net/weixin_47532216/article/details/121376699