Numerical data transfer processing character DataFrame

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import StratifiedKFold,KFold
12529562-29698e6c96abfa4c.png
sad26.png
workclass = df['workclass'].unique()
workclass
12529562-9eadd8f0261f3ba8.png
123.png
np.argwhere(workclass=='State-gov')[0][0]

>>>0

def conver(x):
    return np.argwhere(workclass==x)[0,0]

X['workclass'] = X['workclass'].map(conver)
12529562-de8f78ff15c93b3a.png
234.png
X.columns
12529562-d6c3a79a542e81ed.png
qwe17.png
clos = ['marital_status', 'occupation',
       'relationship', 'race', 'sex', 'native_country']

for clo in clos:
    u = X[clo].unique()
    
    def conver(x):
        return np.argwhere(u==x)[0,0]
    
    X[clo] = X[clo].map(conver)
12529562-1df441145a7f2d3b.png
12313.png
sKfold = StratifiedKFold(n_splits=20)

knn = KNeighborsClassifier(n_neighbors=5)
scores = []
for train,test in sKfold.split(X,y):
#     print(train.shape)
    knn.fit(X.iloc[train],y[train])
    
    s = knn.score(X.iloc[test],y[test])
    scores.append(s)
np.mean(scores)

>>>0.800651505347638

Guess you like

Origin blog.csdn.net/weixin_33872566/article/details/90810237