各种机器学习模型模板代码

roc结果汇总到一张表格里

使用time.clock记录计算时间

使用 cross_val_score 做k折交叉

用dataframe记录参数表格

t_diff=[]
# 逻辑回归
log_reg = LogisticRegression()
t_start = time.clock()#通过time记录
log_scores = cross_val_score(log_reg, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
log_reg_mean = log_scores.mean()

# 支持向量机
svc_clf = SVC()
t_start = time.clock()
svc_scores = cross_val_score(svc_clf, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
svc_mean = svc_scores.mean()

# k邻近
knn_clf = KNeighborsClassifier()
t_start = time.clock()
knn_scores = cross_val_score(knn_clf, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
knn_mean = knn_scores.mean()

# 决策树
tree_clf = tree.DecisionTreeClassifier()
t_start = time.clock()
tree_scores = cross_val_score(tree_clf, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
tree_mean = tree_scores.mean()

# 梯度提升树
grad_clf = GradientBoostingClassifier()
t_start = time.clock()
grad_scores = cross_val_score(grad_clf, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
grad_mean = grad_scores.mean()

# 随机森林
rand_clf = RandomForestClassifier()
t_start = time.clock()
rand_scores = cross_val_score(rand_clf, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
rand_mean = rand_scores.mean()

# 神经网络
neural_clf = MLPClassifier(alpha=0.01)
t_start = time.clock()
neural_scores = cross_val_score(neural_clf, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
neural_mean = neural_scores.mean()

# 朴素贝叶斯
nav_clf = GaussianNB()
t_start = time.clock()
nav_scores = cross_val_score(nav_clf, X_train, y_train, cv=3, scoring='roc_auc')
t_end = time.clock()
t_diff.append((t_end - t_start))
nav_mean = neural_scores.mean()

d = {'Classifiers': ['Logistic Reg.', 'SVC', 'KNN', 'Dec Tree', 'Grad B CLF', 'Rand FC', 'Neural Classifier', 'Naives Bayes'], 
    'Crossval Mean Scores': [log_reg_mean, svc_mean, knn_mean, tree_mean, grad_mean, rand_mean, neural_mean, nav_mean],
    'time':t_diff}

result_df = pd.DataFrame(d)
result_df = result_df.sort_values(by=['Crossval Mean Scores'], ascending=False)
result_df

Guess you like

Origin blog.csdn.net/weixin_45955767/article/details/119861528