La intención original de la escritura: el código está escrito y los indicadores de evaluación son muy desordenados y desordenados. Quiero escribir una clase adecuada para el aprendizaje de múltiples etiquetas o indicadores de evaluación de clasificación de múltiples etiquetas , para lograr varios indicadores de uso común, de modo que para evitar buscar en todas partes más adelante, lo que requiere mucho tiempo y trabajo. Para más referencia
Se han cumplido los indicadores (actualizados continuamente en el futuro):
1 exactitud-exactitud
2 F1 valor-fscore
3 hamming_loss -hamming_distance
4 AUROC
5 AUPRC- (Muchos blogs no encontraron esto)
6 Precisión-avgPrecision
Intuitivamente, la precisión es la capacidad del clasificador de no etiquetar como positiva una muestra negativa, y la recuperación es la capacidad del clasificador para encontrar todas las muestras positivas, y la tasa de recuperación es la capacidad del clasificador para encontrar todas las muestras positivas. .) 2. Cuanto más alto, mejor.
Notas breves personales sobre precisión y recuerdo (ambas basadas en múltiples clasificaciones como antecedentes), es mejor leer el siguiente ejemplo en combinación con una matriz de confusión:
Precisión: literalmente, espero que la precisión de la verificación sea lo mejor posible y que la precisión del modelo de reconocimiento no sea precisa. Como ejemplo extremo, el modelo predice que 99 muestras de este tipo son verdaderas (99/100, menos de 100 muestras), pero en realidad solo hay 5 muestras. En este momento prec = 5/99, la tasa de precisión es muy pequeña. Significa que el modelo no es exacto porque marca muchas muestras negativas como muestras positivas.
Tasa de retiro del mercado: literalmente, recuerdo, espero que cada vez se retiren menos productos (por ejemplo, el producto A está desarrollado y la empresa piensa que es un buen producto (la predicción de su modelo es cierta), pero cuando los consumidores lo usan, encuentran que muchos son malos (en realidad es falso). En este momento, el producto debe retirarse del mercado, repararse nuevamente y luego entregarse a los consumidores). Cuanto mayor sea la tasa de recuperación, menor será la probabilidad de que la empresa realmente retire el producto, es decir, 1 recuperación.
7 tasa de recuperación -avg
8 Cobertura de convergencia
9 pérdida de rango -ranking_loss
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 6 20:38:38 2020
@author ylyang
"""
#from sklearn import datasets
import sklearn
#import torch
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigs
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score, recall_score,auc
# np.set_printoptions(threshold='nan')
class Metric(object):
def __init__(self,output,label):
self.output = output #prediction label matric
self.label = label #true label matric
def accuracy_subset(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
accuracy=accuracy_score(y_true,y_pred)
return accuracy
def accuracy_mean(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
accuracy=np.mean(np.equal(y_true,y_pred))
return accuracy
def accuracy_multiclass(self):
y_pred =self.output
y_true = self.label
accuracy=accuracy_score(np.argmax(y_pred,1),np.argmax(y_true,1))
return accuracy
def micfscore(self,threash=0.5,type='micro'):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return f1_score(y_pred,y_true,average=type)
def macfscore(self,threash=0.5,type='macro'):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return f1_score(y_pred,y_true,average=type)
def hamming_distance(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return hamming_loss(y_true,y_pred)
def fscore_class(self,type='micro'):
y_pred =self.output
y_true = self.label
return f1_score(np.argmax(y_pred,1),np.argmax(y_true,1),average=type)
def auROC(self):
y_pred =self.output
y_true = self.label
row,col = label.shape
temp = []
ROC = 0
for i in range(col):
sigle_ROC = roc_auc_score(y_true[:,i], y_pred[:,i], average='macro', sample_weight=None)
#print("%d th AUROC: %f"%(i,ROC))
temp.append(sigle_ROC)
ROC += sigle_ROC
return ROC/(col)
def MacroAUC(self):
y_pred =self.output #num_instance*num_label
y_true = self.label #num_instance*num_label
num_instance,num_class = y_pred.shape
count = np.zeros((num_class,1)) # store the number of postive instance'score>negative instance'score
num_P_instance = np.zeros((num_class,1)) #number of positive instance for every label
num_N_instance = np.zeros((num_class,1))
AUC = np.zeros((num_class,1)) # for each label
count_valid_label = 0
for i in range(num_class): #第i类
num_P_instance[i,0] = sum(y_true[:,i] == 1) #label,,test_target
num_N_instance[i,0] = num_instance - num_P_instance[i,0]
# exclude the label on which all instances are positive or negative,
# leading to num_P_instance(i,1) or num_N_instance(i,1) is zero
if num_P_instance[i,0] == 0 or num_N_instance[i,0] == 0:
AUC[i,0] = 0
count_valid_label = count_valid_label + 1
else:
temp_P_Outputs = np.zeros((int(num_P_instance[i,0]), num_class))
temp_N_Outputs = np.zeros((int(num_N_instance[i,0]), num_class))
#
temp_P_Outputs[:,i] = y_pred[y_true[:,i]==1,i]
temp_N_Outputs[:,i] = y_pred[y_true[:,i]==0,i]
for m in range(int(num_P_instance[i,0])):
for n in range(int(num_N_instance[i,0])):
if(temp_P_Outputs[m,i] > temp_N_Outputs[n,i] ):
count[i,0] = count[i,0] + 1
elif(temp_P_Outputs[m,i] == temp_N_Outputs[n,i]):
count[i,0] = count[i,0] + 0.5
AUC[i,0] = count[i,0]/(num_P_instance[i,0]*num_N_instance[i,0])
macroAUC1 = sum(AUC)/(num_class-count_valid_label)
return float(macroAUC1),AUC
def avgPrecision(self):
y_pred =self.output
y_true = self.label
num_instance,num_class = y_pred.shape
precision_value = 0
precisions = []
for i in range(num_instance):
p = precision_score(y_true[i,:], y_pred[i,:])
precisions.append(p)
precision_value += p
#print(precision_value)
pre_list = np.array([1.0] + precisions + [0.0] )#for get AUPRC
#print(pre_list)
return float(precision_value/num_instance), pre_list
def avgRecall(self):
y_pred =self.output
y_true = self.label
num_instance,num_class = y_pred.shape
recall_value = 0
recalls = []
for i in range(num_instance):
p = recall_score(y_true[i,:], y_pred[i,:])
recalls.append(p)
recall_value += p
rec_list = np.array([0.0] + recalls + [1.0]) #for get AUPRC
sorting_indices = np.argsort(rec_list)
#print(rec_list)
return float(recall_value/num_instance),rec_list,sorting_indices
def getAUPRC(self):
avgPrecision,precisions = self.avgPrecision()
avfRecall,recalls, sorting_indices = self.avgRecall()
#x is either increasing or decreasing
#such as recalls[sorting_indices]
auprc = auc(recalls[sorting_indices], precisions[sorting_indices])
return auprc
def cal_single_label_micro_auc(self,x, y):
idx = np.argsort(x) # 升序排列
y = y[idx]
m = 0
n = 0
auc = 0
for i in range(x.shape[0]):
if y[i] == 1:
m += 1
auc += n
if y[i] == 0:
n += 1
auc /= (m * n)
return auc
def get_micro_auc(self):
"""
:param x: the predicted outputs of the classifier, the output of the ith instance for the jth class is stored in x(i,j)
:param y: the actual labels of the instances, if the ith instance belong to the jth class, y(i,j)=1, otherwise y(i,j)=0
:return: the micro auc
"""
x =self.output
y = self.label
n, d = x.shape
if x.shape[0] != y.shape[0]:
print("num of instances for output and ground truth is different!!")
if x.shape[1] != y.shape[1]:
print("dim of output and ground truth is different!!")
x = x.reshape(n * d)
y = y.reshape(n * d)
auc = self.cal_single_label_micro_auc(x, y)
return auc
def cal_single_instance_coverage(self,x, y):
idx = np.argsort(x) # 升序排列
y = y[idx]
loc = x.shape[0]
for i in range(x.shape[0]):
if y[i] == 1:
loc -= i
break
return loc
def get_coverage(self):
"""
:param x: the predicted outputs of the classifier, the output of the ith instance for the jth class is stored in x(i,j)
:param y: the actual labels of the test instances, if the ith instance belong to the jth class, y(i,j)=1, otherwise y(i,j)=0
:return: the coverage
"""
x =self.output
y = self.label
n, d = x.shape
if x.shape[0] != y.shape[0]:
print("num of instances for output and ground truth is different!!")
if x.shape[1] != y.shape[1]:
print("dim of output and ground truth is different!!")
cover = 0
for i in range(n):
cover += self.cal_single_instance_coverage(x[i], y[i])
cover = cover / n - 1
return cover
def cal_single_instance_ranking_loss(self,x, y):
idx = np.argsort(x) # 升序排列
y = y[idx]
m = 0
n = 0
rl = 0
for i in range(x.shape[0]):
if y[i] == 1:
m += 1
if y[i] == 0:
rl += m
n += 1
rl /= (m * n)
return rl
def get_ranking_loss(self):
"""
:param x: the predicted outputs of the classifier, the output of the ith instance for the jth class is stored in x(i,j)
:param y: the actual labels of the test instances, if the ith instance belong to the jth class, y(i,j)=1, otherwise x(i,j)=0
:return: the ranking loss
"""
x =self.output
y = self.label
n, d = x.shape
if x.shape[0] != y.shape[0]:
print("num of instances for output and ground truth is different!!")
if x.shape[1] != y.shape[1]:
print("dim of output and ground truth is different!!")
m = 0
rank_loss = 0
for i in range(n):
s = np.sum(y[i])
if s in range(1, d):
rank_loss += self.cal_single_instance_ranking_loss(x[i], y[i])
m += 1
rank_loss /= m
return rank_loss
if __name__ == '__main__':
# 6行5列,6个样本,5个类别标记
output = np.array([[1,0,0,0,1],
[1,1,0,1,0],
[0,1,0,0,1],
[1,0,1,0,1],
[1,0,1,1,1],
[1,1,0,0,1]
])
label = np.array([ [1,0,1,0,1],
[1,1,0,1,0],
[0,1,0,0,1],
[0,1,0,0,1],
[0,0,1,0,1],
[1,1,0,0,1]
])
myMetic = Metric(output,label)
#Macrof1 = myMetic.fscore_class()
ham = myMetic.hamming_distance()
Microf1 = myMetic.micfscore()
Macrof1 = myMetic.macfscore()
AUROC = myMetic.auROC()
MacroAUROC1,AUC_list = myMetic.MacroAUC()
avgPrecision,precisions = myMetic.avgPrecision()
avfRecall,recalls,sorting_indices = myMetic.avgRecall()
auprc = myMetic.getAUPRC()
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
micro_auc = myMetic.get_micro_auc()
coverage = myMetic.get_coverage()
ranking_loss = myMetic.get_ranking_loss()
# #print(Macrof1)
print("ham:",ham)
print("Microf1:",Microf1)
print("Macrof1:",Macrof1)
print("AUROC: ",(AUROC))
print("MacroAUC: ",(MacroAUROC1))
# #print(": ",(AUC_list))
print("avgPrecision: ",avgPrecision)
print("avfRecall: ",avfRecall)
print("AUPRC: ",auprc)
print("get_micro_auc _from_KDD2018M3DS:",micro_auc)
print("get_coverage _from_KDD2018M3DS:",coverage)
print("get_ranking_loss _from_KDD2018M3DS:",ranking_loss)
# #iris = datasets.load_iris()
Calcular AUPRC con referencia al autor
ps: la capacidad es limitada, hay errores, puede comentar a continuación y modificar de vez en cuando