任务四(订正)

模型评估
对构建的七个模型进行评估
models = {‘随机森林’: forest_grid.best_estimator_,
‘GBDT’: gbdt.best_estimator_,
‘XGBoost’: xgb.best_estimator_,
‘LightGBM’: lgbm,
‘逻辑回归’: log_grid.best_estimator_,
‘SVM’: svc_grid.best_estimator_,
‘决策树’: tree_grid.best_estimator_}

assessments = {
‘Accuracy’: [],
‘Precision’: [],
‘Recall’: [],
‘F1-score’: [],
‘AUC’: []
}
def plot_roc_curve(fpr, tpr, label=None):
plt.plot(fpr, tpr, label=label)
plt.plot([0, 1], [0, 1], ‘k–’)
plt.axis([0, 1, 0, 1])
plt.xlabel(‘False Positive Rate’)
plt.ylabel(‘True Positive Rate’)
plt.legend()
plt.tight_layout()
for name, model in models.items():
test_pre = model.predict(X_test)
train_pre = model.predict(X_train)
test_proba = model.predict_proba(X_test)[:,1]
train_proba = model.predict_proba(X_train)[:,1]

acc_test = accuracy_score(test_pre, y_test) * 100
acc_train = accuracy_score(train_pre, y_train) * 100
accuracy = '训练集:%.2f%%;测试集:%.2f%%' % (acc_train, acc_test)
assessments['Accuracy'].append(accuracy)

pre_test = precision_score(test_pre, y_test) * 100
pre_train = precision_score(train_pre, y_train) * 100
precision = '训练集:%.2f%%;测试集:%.2f%%' % (pre_train, pre_test)
assessments['Precision'].append(precision)

rec_test = recall_score(test_pre, y_test) * 100
rec_train = recall_score(train_pre, y_train) * 100
recall = '训练集:%.2f%%;测试集:%.2f%%' % (rec_train, rec_test)
assessments['Recall'].append(recall)

f1_test = f1_score(test_pre, y_test) * 100
f1_train = f1_score(train_pre, y_train) * 100
f1 = '训练集:%.2f%%;测试集:%.2f%%' % (f1_train, f1_test)
assessments['F1-score'].append(f1)

fig = plt.figure(figsize=(8, 6))
fpr, tpr, thresholds = roc_curve(y_test, test_proba)
plot_roc_curve(fpr, tpr, label='测试集')
fpr, tpr, thresholds = roc_curve(y_train, train_proba)
plot_roc_curve(fpr, tpr, label='训练集')
plt.title(name)

auc_test = roc_auc_score(y_test, test_proba) * 100
auc_train = roc_auc_score(y_train, train_proba) * 100
auc = '训练集:%.2f%%;测试集:%.2f%%' % (auc_train, auc_test)
assessments['AUC'].append(auc)

fig = plt.figure(figsize=(8, 6))
for name, model in models.items():
proba = model.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, proba)
plot_roc_curve(fpr, tpr, label=name)
fig = plt.figure(figsize=(8, 6))
for name, model in models.items():
proba = model.predict_proba(X_train)[:,1]
fpr, tpr, thresholds = roc_curve(y_train, proba)
plot_roc_curve(fpr, tpr, label=name)
ass_df = pd.DataFrame(assessments, index=models.keys())
ass_df
AUC Accuracy F1-score Precision Recall
随机森林 训练集:90.82%;测试集:79.88% 训练集:84.33%;测试集:79.60% 训练集:58.07%;测试集:46.69% 训练集:43.59%;测试集:34.68% 训练集:86.96%;测试集:71.43%
GBDT 训练集:87.87%;测试集:79.15% 训练集:84.26%;测试集:78.78% 训练集:57.17%;测试集:44.01% 训练集:42.18%;测试集:32.37% 训练集:88.68%;测试集:68.71%
XGBoost 训练集:90.41%;测试集:79.28% 训练集:85.06%;测试集:79.23% 训练集:63.03%;测试集:49.18% 训练集:51.15%;测试集:39.02% 训练集:82.10%;测试集:66.50%
LightGBM 训练集:86.70%;测试集:79.53% 训练集:82.41%;测试集:78.93% 训练集:49.77%;测试集:41.41% 训练集:35.00%;测试集:28.90% 训练集:86.12%;测试集:72.99%
逻辑回归 训练集:76.33%;测试集:78.34% 训练集:78.77%;测试集:78.70% 训练集:37.68%;测试集:38.89% 训练集:25.77%;测试集:26.30% 训练集:70.03%;测试集:74.59%
SVM 训练集:80.23%;测试集:74.26% 训练集:80.82%;测试集:77.96% 训练集:43.14%;测试集:34.80% 训练集:29.23%;测试集:22.83% 训练集:82.31%;测试集:73.15%
决策树 训练集:76.63%;测试集:74.19% 训练集:79.29%;测试集:77.14% 训练集:46.41%;测试集:43.46% 训练集:36.03%;测试集:34.10% 训练集:65.20%;测试集:59.90%

猜你喜欢

转载自blog.csdn.net/weixin_41741008/article/details/88361769