数据挖掘项目--模型评估

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score,roc_curve, auc
import matplotlib.pyplot as plt
%matplotlib inline

def model_metrics(clf, X_train, X_test, y_train, y_test):
    # 预测
    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)
    
    y_train_proba = clf.predict_proba(X_train)[:,1]
    y_test_proba = clf.predict_proba(X_test)[:,1]
    
    # 准确率
    print('[准确率]', end = ' ')
    print('训练集:', '%.4f'%accuracy_score(y_train, y_train_pred), end = ' ')
    print('测试集:', '%.4f'%accuracy_score(y_test, y_test_pred))
    
    # 精准率
    print('[精准率]', end = ' ')
    print('训练集:', '%.4f'%precision_score(y_train, y_train_pred), end = ' ')
    print('测试集:', '%.4f'%precision_score(y_test, y_test_pred))

    # 召回率
    print('[召回率]', end = ' ')
    print('训练集:', '%.4f'%recall_score(y_train, y_train_pred), end = ' ')
    print('测试集:', '%.4f'%recall_score(y_test, y_test_pred))
    
    # f1-score
    print('[f1-score]', end = ' ')
    print('训练集:', '%.4f'%f1_score(y_train, y_train_pred), end = ' ')
    print('测试集:', '%.4f'%f1_score(y_test, y_test_pred))
    
    # auc取值:用roc_auc_score或auc
    print('[auc值]', end = ' ')
    print('训练集:', '%.4f'%roc_auc_score(y_train, y_train_proba), end = ' ')
    print('测试集:', '%.4f'%roc_auc_score(y_test, y_test_proba))
    
    # roc曲线
    fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_train_proba, pos_label = 1)
    fpr_test, tpr_test, thresholds_test = roc_curve(y_test, y_test_proba, pos_label = 1)
    
    label = ["Train - AUC:{:.4f}".format(auc(fpr_train, tpr_train)), 
             "Test - AUC:{:.4f}".format(auc(fpr_test, tpr_test))]
    plt.plot(fpr_train,tpr_train)
    plt.plot(fpr_test,tpr_test)
    plt.plot([0, 1], [0, 1], 'd--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(label, loc = 4)
    plt.title("ROC curve")
 

# 逻辑回归
model_metrics(lr, X_train, X_test, y_train, y_test)
# 线性SVM
model_metrics(svm_linear, X_train, X_test, y_train, y_test)
# 多项式SVM
model_metrics(svm_poly, X_train, X_test, y_train, y_test)
# 高斯核SVM
model_metrics(svm_rbf, X_train, X_test, y_train, y_test)
# sigmoid-SVM
model_metrics(svm_sigmoid, X_train, X_test, y_train, y_test)
# 决策树
model_metrics(dt, X_train, X_test, y_train, y_test)
# XGBoost
model_metrics(xgb, X_train, X_test, y_train, y_test)
# lightGBM
model_metrics(lgb, X_train, X_test, y_train, y_test)
 

猜你喜欢

转载自blog.csdn.net/l422380631/article/details/88392487
今日推荐