绘制confusion matrix的R实现

3.使用yardstick包中的conf_mat() 和autoplot() 函数

在评估分类器效果的时候，除了要呈现sensitivity，specificity，F1score等参数外，还需要图示confusion matrix的结果，以更直观地呈现结果。以以下示例文件为例

set.seed(123)
data <- data.frame(Actual = sample(c("True","False"), 100, replace = TRUE),
                   Prediction = sample(c("True","False"), 100, replace = TRUE)
                   )
table(data$Prediction, data$Actual)

输出结果为

       False True
 False    23   31
 True     20   26

此为confusion matrix的表格，如何图示呢？以下提供几种图示方法：

1.自写函数

先用caret包中的confusionMatrix函数构建matrix

library(caret)
result_matrix<-confusionMatrix(data$Prediction,data$Actual)

输出结果为:

Confusion Matrix and Statistics

          Reference
Prediction False True
     False    23   31
     True     20   26
                                         
               Accuracy : 0.49           
                 95% CI : (0.3886, 0.592)
    No Information Rate : 0.57           
    P-Value [Acc > NIR] : 0.9564         
                                         
                  Kappa : -0.0087        
                                         
 Mcnemar's Test P-Value : 0.1614         
                                         
            Sensitivity : 0.5349         
            Specificity : 0.4561         
         Pos Pred Value : 0.4259         
         Neg Pred Value : 0.5652         
             Prevalence : 0.4300         
         Detection Rate : 0.2300         
   Detection Prevalence : 0.5400         
      Balanced Accuracy : 0.4955         
                                         
       'Positive' Class : False

自写函数

draw_confusion_matrix <- function(cm) {
  
  layout(matrix(c(1,1,2)))
  par(mar=c(2,2,2,2))
  plot(c(100, 345), c(300, 450), type = "n", xlab="", ylab="", xaxt='n', yaxt='n')
  title('CONFUSION MATRIX', cex.main=2)
  
  # create the matrix 
  rect(150, 430, 240, 370, col='#3F97D0')
  text(195, 435, 'Non-COVID-19', cex=1.2)
  rect(250, 430, 340, 370, col='#F7AD50')
  text(295, 435, 'COVID-19', cex=1.2)
  text(125, 370, 'Predicted', cex=1.3, srt=90, font=2)
  text(245, 450, 'Actual', cex=1.3, font=2)
  rect(150, 305, 240, 365, col='#F7AD50')
  rect(250, 305, 340, 365, col='#3F97D0')
  text(140, 400, 'Non-COVID-19', cex=1.2, srt=90)
  text(140, 335, 'COVID-19', cex=1.2, srt=90)
  
  # add in the cm results 
  res <- as.numeric(cm$table)
  text(195, 400, res[1], cex=1.6, font=2, col='white')
  text(195, 335, res[2], cex=1.6, font=2, col='white')
  text(295, 400, res[3], cex=1.6, font=2, col='white')
  text(295, 335, res[4], cex=1.6, font=2, col='white')
  
  # add in the specifics 
  plot(c(100, 0), c(100, 0), type = "n", xlab="", ylab="", main = "DETAILS", xaxt='n', yaxt='n')
  text(10, 85, names(cm$byClass[1]), cex=1.2, font=2)
  text(10, 70, round(as.numeric(cm$byClass[1]), 3), cex=1.2)
  text(30, 85, names(cm$byClass[2]), cex=1.2, font=2)
  text(30, 70, round(as.numeric(cm$byClass[2]), 3), cex=1.2)
  text(50, 85, names(cm$byClass[5]), cex=1.2, font=2)
  text(50, 70, round(as.numeric(cm$byClass[5]), 3), cex=1.2)
  text(70, 85, names(cm$byClass[6]), cex=1.2, font=2)
  text(70, 70, round(as.numeric(cm$byClass[6]), 3), cex=1.2)
  text(90, 85, names(cm$byClass[7]), cex=1.2, font=2)
  text(90, 70, round(as.numeric(cm$byClass[7]), 3), cex=1.2)
  
  # add in the accuracy information 
  text(30, 35, names(cm$overall[1]), cex=1.5, font=2)
  text(30, 20, round(as.numeric(cm$overall[1]), 3), cex=1.4)
  text(70, 35, names(cm$overall[2]), cex=1.5, font=2)
  text(70, 20, round(as.numeric(cm$overall[2]), 3), cex=1.4)
}

使用绘图函数

draw_confusion_matrix(result_matrix)

2.使用内置函数fourfoldplot

ctable <- table(data$Actual,data$Prediction)
fourfoldplot(ctable, color = c("#CC6666", "#99CC99"),
             conf.level = 0, margin = 1, main = "Confusion Matrix")

3.使用yardstick包中的`conf_mat()` 和`autoplot()` 函数

library(yardstick)
library(ggplot2)


# The confusion matrix from a single assessment set (i.e. fold)
cm <- conf_mat(data, Actual, Prediction)

autoplot(cm, type = "heatmap") +
    scale_fill_gradient(low="#D6EAF8",high = "#2E86C1")

4.多类别confusion matrix

#data
confusionMatrix(iris$Species, sample(iris$Species))
newPrior <- c(.05, .8, .15)
names(newPrior) <- levels(iris$Species)

cm <-confusionMatrix(iris$Species, sample(iris$Species))

# extract the confusion matrix values as data.frame
cm_d <- as.data.frame(cm$table)
# confusion matrix statistics as data.frame
cm_st <-data.frame(cm$overall)
# round the values
cm_st$cm.overall <- round(cm_st$cm.overall,2)

# here we also have the rounded percentage values
cm_p <- as.data.frame(prop.table(cm$table))
cm_d$Perc <- round(cm_p$Freq*100,2)

library(ggplot2)     # to plot
library(gridExtra)   # to put more
library(grid)        # plot together

# plotting the matrix
cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
  geom_tile() +
  geom_text(aes(label = paste("",Freq,",",Perc,"%")), color = 'red', size = 8) +
  theme_light() +
  guides(fill=FALSE) 

# plotting the stats
cm_st_p <-  tableGrob(cm_st)

# all together
grid.arrange(cm_d_p, cm_st_p,nrow = 1, ncol = 2, 
             top=textGrob("Confusion Matrix and Statistics",gp=gpar(fontsize=25,font=1)))

Reference：

R how to visualize confusion matrix using the caret package - Stack Overflow