python实现AUROC多标签分类计算

import sklearn
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigs
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import roc_auc_score

# np.set_printoptions(threshold='nan')
class Metric(object):
    def __init__(self,output,label):
        self.output = output   #prediction label matric
        self.label = label    #true  label matric

    def accuracy_subset(self,threash=0.5):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        accuracy=accuracy_score(y_true,y_pred)
        return accuracy
    
    def accuracy_mean(self,threash=0.5):
        y_pred =self.output
        y_true = self.label      
        y_pred=np.where(y_pred>threash,1,0)
        accuracy=np.mean(np.equal(y_true,y_pred))
        return accuracy
    
    def accuracy_multiclass(self):
        y_pred =self.output
        y_true = self.label     
        accuracy=accuracy_score(np.argmax(y_pred,1),np.argmax(y_true,1))
        return accuracy
    
    def micfscore(self,threash=0.5,type='micro'):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        return f1_score(y_pred,y_true,average=type)
    def macfscore(self,threash=0.5,type='macro'):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        return f1_score(y_pred,y_true,average=type)
    
    def hamming_distance(self,threash=0.5):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        return hamming_loss(y_true,y_pred)
    
    def fscore_class(self,type='micro'):
        y_pred =self.output
        y_true = self.label
        return f1_score(np.argmax(y_pred,1),np.argmax(y_true,1),average=type)
    
    def auROC(self):
        y_pred =self.output
        y_true = self.label
        row,col = label.shape
        temp = []
        ROC = 0
        for i in range(col):
            ROC = roc_auc_score(y_true[:,i], y_pred[:,i], average='micro', sample_weight=None)
            print("%d th AUROC: %f"%(i,ROC))   
            temp.append(ROC)
        for i in range(col):
            ROC += float(temp[i])
        return ROC/(col+1)
    
    def MacroAUC(self):
        y_pred =self.output #num_instance*num_label
        y_true = self.label #num_instance*num_label 
        num_instance,num_class =   y_pred.shape
        count = np.zeros((num_class,1))   # store the number of postive instance'score>negative instance'score
        num_P_instance =  np.zeros((num_class,1)) #number of positive instance for every label      
        num_N_instance =  np.zeros((num_class,1)) 
        auc = np.zeros((num_class,1))  # for each label
        count_valid_label = 0
        for  i in range(num_class): #第i类
            num_P_instance[i,0] = sum(y_true[:,i] == 1) #label,,test_target
            num_N_instance[i,0] = num_instance - num_P_instance[i,0]
            # exclude the label on which all instances are positive or negative,
            # leading to num_P_instance(i,1) or num_N_instance(i,1) is zero
            if num_P_instance[i,0] == 0 or num_N_instance[i,0] == 0:
                auc[i,0] = 0
                count_valid_label = count_valid_label + 1
            else:

                temp_P_Outputs = np.zeros((int(num_P_instance[i,0]), num_class))
                temp_N_Outputs = np.zeros((int(num_N_instance[i,0]), num_class))
                #
                temp_P_Outputs[:,i] = y_pred[y_true[:,i]==1,i]
                temp_N_Outputs[:,i] = y_pred[y_true[:,i]==0,i]    
                for m in range(int(num_P_instance[i,0])):
                    for n in range(int(num_N_instance[i,0])):
                        if(temp_P_Outputs[m,i] > temp_N_Outputs[n,i] ):
                            count[i,0] = count[i,0] + 1
                        elif(temp_P_Outputs[m,i] == temp_N_Outputs[n,i]):
                            count[i,0] = count[i,0] + 0.5
                
                auc[i,0] = count[i,0]/(num_P_instance[i,0]*num_N_instance[i,0])  
        macroAUC1 = sum(auc)/(num_class-count_valid_label)
        return  float(macroAUC1),auc       
                
if __name__ == '__main__':
    output = np.array([[1,0,0,0,1],
                       [1,1,0,1,0],
                       [0,1,0,0,1],
                       [1,0,1,0,1],
                       [1,0,1,1,1],
                       [1,1,0,0,1]
            ])
    
    label = np.array([ [1,0,1,0,1],
                       [1,1,0,1,0],
                       [0,1,0,0,1],
                       [0,1,0,0,1],
                       [0,0,1,0,1],
                       [1,1,0,0,1]
            ])
    myMetic = Metric(output,label)

    AUROC = myMetic.auROC() 
    AUROC1,auc = myMetic.MacroAUC() 
    print("AUROC: ",(AUROC))     
    print("MacroAUC: ",(AUROC1)) 
    print(": ",(auc))

auROC方法是基于里面自带函数写的，MacroAUC是自己写的，下面是输出的结果：

比较奇怪的是2个方法的每个类别的输出AUC值一样，但合并以后就不一样了（ps:这里我也没弄清楚,欢迎下面留言讨论），上面是具体输出，（代码里面给出了数据，即每个样本的多个标签预测值和实际值矩阵， $R^{\in k\times c}$ ,k为样本数，c为预测的总类别数，可以复制代码运行试试）

python实现AUROC多标签分类计算

猜你喜欢