数据分析师养成之路之python偏:(画AUC和混淆矩阵)

画AUC和混淆矩阵
代码如下:

准备数据,构造模型:

from sklearn import datasets

import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn import linear_model
from sklearn.metrics import roc_curve,auc
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import itertools

breast_cancer=datasets.load_breast_cancer()
X=breast_cancer.data
y=breast_cancer.target

cv=StratifiedKFold(n_splits=5)
lmr=linear_model.LogisticRegression()

开始画图,AUC和混淆矩阵(数字型,分数型)

def paintRoc(y_true,y_preb):
    fpr,tpr,thresholds=roc_curve(y_true,y_preb[:,1])
    roc_auc=auc(fpr,tpr)
    plt.plot(fpr,tpr,lw=5,alpha=0.8,color='r',label='Roc(AUC=%0.2f)'%(roc_auc))
    plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
         label='Luck', alpha=.8)
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title('ROC_auc(AUC=%0.2f)'%(roc_auc))
    plt.legend(loc="lower right")
    plt.show()

def paintConfusion_digit(lmr_matrix,classes):
    plt.imshow(lmr_matrix,interpolation='nearest',cmap=plt.cm.Blues)
    plt.title('confusion matrix')
    plt.colorbar()
    tick_marks=np.arange(len(classes))
    plt.xticks(tick_marks,classes,rotation=45)
    plt.yticks(tick_marks,classes)
    plt.xlabel('Pre label')
    plt.ylabel('True label')

    fmt='d'
    thresh=lmr_matrix.max()/2.
    for i,j in itertools.product(range(lmr_matrix.shape[0]),range(lmr_matrix.shape[1])):
        plt.text(j, i, format(lmr_matrix[i, j], fmt),
                     horizontalalignment="center",
                     color="black" if lmr_matrix[i, j] > thresh else "red")
    plt.tight_layout()
    plt.show()

def paintConfusion_float(lmr_matrix,classes):
    plt.imshow(lmr_matrix,interpolation='nearest',cmap=plt.cm.Blues)
    plt.title('confusion matrix')
    plt.colorbar()
    tick_marks=np.arange(len(classes))
    plt.xticks(tick_marks,classes,rotation=45)
    plt.yticks(tick_marks,classes)
    plt.xlabel('Pre label')
    plt.ylabel('True label')

    lmr_matrix=lmr_matrix.astype('float')/lmr_matrix.sum(axis=1)[:,np.newaxis]


    fmt='.2f'
    thresh=lmr_matrix.max()/2.
    for i,j in itertools.product(range(lmr_matrix.shape[0]),range(lmr_matrix.shape[1])):
        plt.text(j, i, format(lmr_matrix[i, j], fmt),
                     horizontalalignment="center",
                     color="black" if lmr_matrix[i, j] > thresh else "red")
    plt.tight_layout()
    plt.show()

下面是调用上面函数

for train,test in cv.split(X,y):
    probas_=lmr.fit(X[train],y[train]).predict_proba(X[test])
    y_pre=lmr.fit(X[train],y[train]).predict(X[test])
    paintRoc(y[test],probas_)
    print('\n')
    lmr_matrix=confusion_matrix(y[test],y_pre)
    paintConfusion_float(lmr_matrix,set(y_test))
    print('\n')

数据分析师养成之路之python偏:(画AUC和混淆矩阵)

猜你喜欢