After sklearn onehot fit together with the original features

from sklearn.preprocessing import OneHotEncoder
ddd2 = pd.DataFrame({
    
    'f1': [1, 2, 3, 4, 5, 6], 'f2': ['你', '我', '他', '你', '我', '她'], 'y': [1, 1, 1, 0, 0, 0]})
ddd2

one = OneHotEncoder()
ddd2['f2'] = one.fit_transform(ddd2['f2'].values.reshape(-1, 1))
ddd2


ddd2['index'] = ddd2.index
def indexapply(index, x):
    return np.float64(x.toarray())[index, :]

ddd2['f2'] = ddd2.apply(lambda row: indexapply(row['index'], row['f2']), axis=1)
ddd2 = ddd2.drop('index', axis=1)
ddd2

insert image description here

from sklearn.ensemble import GradientBoostingClassifier
x2, y2 = ddd2.iloc[:, :-1].values, ddd2.iloc[:, -1].values
print(x2)
print(y2)

def flatten_x_cols(lst):
    res = []
    for i in range(len(lst)):
        lsnew = []
        lsnew.append(lst[i][0])
        for j in lst[i][1]:
            lsnew.append(j)
        res.append(lsnew)
    return res

x2 = flatten_x_cols(x2)


model = GradientBoostingClassifier()
model.fit(x2, y2)
[[1 array([0., 1., 0., 0.])]
 [2 array([0., 0., 0., 1.])]
 [3 array([1., 0., 0., 0.])]
 [4 array([0., 1., 0., 0.])]
 [5 array([0., 0., 0., 1.])]
 [6 array([0., 0., 1., 0.])]]
[1 1 1 0 0 0]
GradientBoostingClassifier()

sklearn splices the results after onehot back to the original dataframe
https://blog.csdn.net/qq_42363032/article/details/121377220

Guess you like

Origin blog.csdn.net/qq_42363032/article/details/123406747