A function to draw a heatmap with multiple mutation types within a gene


  Our common gene mutation heatmap is one gene, one grid, one mutation type, but in fact, in the same patient, the same gene often has multiple mutation types, so traditional heat map drawing tools cannot satisfy us drawing needs. In response to research needs, I wrote a heat map drawing function myself, which internally calls image to draw heat maps, barplot draws histograms, and data.table is used for data processing. For the problem of how multiple mutation types in a gene are represented, this function first uses image to draw a preliminary heat map, and then uses points to add the second mutation and the third mutation in order in the form of squares. At the same time, the position of the block is slightly moved and the size is slightly reduced to achieve a better display effect. At most, four mutations can be represented on one heat map grid.
  The function is as follows, you need to install and load data.table 1.10.4, load RColorBrewer

my_heatmap <- function(vr, pal = c("#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(1,2)],"#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(4,5)],brewer.pal(n = 8, name ="Accent")[c(1,4,6,8,2,3,5,7)],"#E31A1C","#6A3D9A"),type = c("DEL","LOSS","NEUTRAL","GAIN","AMPL","nonsynonymous SNV","synonymous SNV","intronic","stopgain","nonframeshift deletion","splicing", "frameshift deletion","UTR3","frameshift insertion","UTR5"),
                       order_gene = T, order_patient = T, hist_plot = T, legend_dist = 0.4, col_text_cex = 1, sub_gene= NULL,heatmap_mar = c(5,17,1,2), heatmap_oma=c(0.2,0.2,0.2,0.2),heatmap_mex=0.5, legend_mar = c(1,0,4,1),xlab_adj=1, order_omit=c("NoMut","NEUTRAL"))
{
  if((length(pal) - length(type)) !=1 ){stop("Pal must be one longer than type, because first one pal is col for no mutation")}
  if(!is.null(sub_gene)){
    pal_dt <- data.table(pal, type=c("NoMut",type))
    vr <- vr[Gene %in% sub_gene,]
    type <- pal_dt[type %in% unique(vr$Type),type]
    pal <- c(pal[1],pal_dt[type, on="type"][,pal])
  }else{
    pal_dt <- data.table(pal, type=c("NoMut",type))
    type <- pal_dt[type %in% unique(vr$Type),type]
    pal <- c(pal[1],pal_dt[type, on="type"][,pal])
  }
  dt <- unique(vr[,.(Gene,Type,Patient)])
  if(is.null(type)){type <- data.table(table(vr$Type))[order(-N),V1]}
  dt$Type <- factor(dt$Type, levels = type)
  gene <- dt[!Type %in% order_omit,.(N=length(unique(Patient))),by=Gene][order(N),Gene]
  dt$Gene <- factor(dt$Gene, levels = gene)
  patient <- data.table(table(vr[!Type %in% order_omit,]$Patient))[order(-N),V1]
  dt$Patient <- factor(dt$Patient, levels = c(patient, setdiff(unique(dt$Patient),patient)))
  
  
  if(order_gene & !order_patient){setkey(dt, "Gene")}
  if(!order_gene & order_patient){setkey(dt, "Patient")}
  if(order_gene & order_patient){setkey(dt, "Gene","Patient","Type")}
  
  n <- length(unique(dt$Type))
  
  dt$Gene_Patients <- paste(dt$Gene, dt$Patient)
  dt_inf <- dt[,.N,by=.(Gene, Patient)]
  max_mut_num <- max(dt_inf$N)
  dt[,Mut_num:=seq_len(.N),by=.(Patient,Gene)]

  #main plot, heatmap using first mutation type
  dt1 <- copy(dt)
  dt1[Mut_num !=1, Type:=NA]
  dc <- data.frame(dcast(dt1, Patient ~ Gene, value.var = "Type", fun.aggregate = function(x)(x[!is.na(x)][1])))
  rownames(dc)<- dc[,1]
  data_matrix<-data.matrix(dc[,-1])
  data_matrix[is.na(data_matrix)] <- 0
  pal=pal
  breaks<-seq(-1,10,1)
  if(!hist_plot){
    layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(8,2), heights=c(1,1))
    par(mar=heatmap_mar, oma=heatmap_oma, mex=heatmap_mex)
  }else if(hist_plot){
    layout(matrix(c(2,4,1,3),2,2,byrow=TRUE), widths=c(3,1), 
           heights=c(1,3), TRUE)
    par(mar=heatmap_mar)
  }
  
  
  image(x=1:nrow(data_matrix),y=1:ncol(data_matrix),
        z=data_matrix,xlab="",ylab="",breaks=breaks,
        col=pal[1:11],axes=FALSE)
  
  
  #sub plot, points using other mutation type
  add_plot <- function(dt, i){
    dt1 <- copy(dt)
    dt1[Mut_num != i, Type:=NA]
    dc <- data.frame(dcast(dt1, Patient ~ Gene, value.var = "Type", fun.aggregate = function(x){ifelse(length(x) >1,x[!is.na(x)][1],factor(NA))}))
    rownames(dc)<- dc[,1]
    data_matrix <- data.matrix(dc[,-1])
    xy <- which(data_matrix !=0, arr.ind = T)
    #apply(xy, 1, function(x)points(x[1], x[2],pch=15, cex=2.5 -0.5*i, col=pal[data_matrix[x[1],x[2]]+1]))
    apply(xy, 1, function(x)points(x[1]-0.6+i*0.25, x[2],pch=15, cex=1.2 - i*0.08, col=pal[data_matrix[x[1],x[2]]+1]))
  }
  
  ploti <- data.frame(i=2:max_mut_num)
  apply(ploti, 1, function(i){print(add_plot(dt, i))})
  
  text(x=1:nrow(data_matrix)+0.1, y=par("usr")[1] - xlab_adj, 
       srt = 90, adj = 0.5, labels = rownames(data_matrix), 
       xpd = TRUE, cex=col_text_cex)
  axis(2,at=1:ncol(data_matrix),labels=colnames(data_matrix),
       col="white",las=1, cex.lab=0.1)
  abline(h=c(1:ncol(data_matrix))+0.5,v=c(1:nrow(data_matrix))+0.5,
         col="white",lwd=2,xpd=F)
  #title("Correlation between genes",line=8,adj=0)
  
  if(hist_plot){
    #bar plot
    par(mar=c(0,2+0.5,3,heatmap_mar[4]-0.9))
    patient_dt <- dt[,.N,by=.(Patient,Type)]
    mt <- data.frame(dcast(patient_dt, Type ~ Patient, value.var = "N"))
    data_matrix <- data.matrix(mt[,-1])
    rownames(data_matrix) <- mt[,1]
    tryCatch(data_matrix <- data_matrix[setdiff(type, order_omit), patient], error = function(e){print("type argument or your patient name format(include "-" and so on )")})
    data_matrix[is.na(data_matrix)] <- 0
    omit_idx <- NULL
    for(i in order_omit){omit_idx <- c(omit_idx,1+which(type == i))}
    barplot(data_matrix, col=pal[-c(1,omit_idx)],space=0,border = "white",axes=T,xlab="",ann=F, xaxt="n")
    
    par(mar=c( heatmap_mar[1]-2 , 0.8, heatmap_mar[3]+2.2, 3),las=1)
    gene_dt <- dt[,.N,by=.(Gene,Type)]
    mt <- data.frame(dcast(gene_dt, Type ~ Gene, value.var = "N"))
    data_matrix <- data.matrix(mt[,-1])
    rownames(data_matrix) <- mt[,1]
    gene <- gsub("ATM,", "ATM.", gene)
    tryCatch(data_matrix <- data_matrix[setdiff(type, order_omit), gene], error = function(e){print("type argument or check your gene name format(please not include "-" and so on)")})
    data_matrix[is.na(data_matrix)] <- 0
    barplot(data_matrix, col=pal[-c(1,omit_idx)],space=0,border = "white",axes=T,xlab="", ann=F, horiz = T, yaxt="n")
    
  }
  
  #add legend   
  par(mar=legend_mar)
  plot(3, 8,  axes=F, ann=F, type="n")
  ploti <- data.frame(i=1:length(type))
  if(!hist_plot){
    tmp <- apply(ploti, 1, function(i){print(points(2, 10+(length(type)-i)*legend_dist, pch=15, cex=2, col=pal[i+1]))})
    tmp <- apply(ploti, 1, function(i){print(text(3, 10+(length(type)-i)*legend_dist, labels = type[i],pch=15, cex=1, col="black"))})
  }
  if(hist_plot){
    tmp <- apply(ploti, 1, function(i){print(points(2, 5+(length(type)-i)*legend_dist, pch=15, cex=0.9, col=pal[i+1]))})
    tmp <- apply(ploti, 1, function(i){print(text(2.8, 5+(length(type)-i)*legend_dist, labels = type[i],pch=15, cex=0.9, col="black"))})  
  }
  
}

Description :
  Draw a heat map that can display multiple mutation types at the same time for a gene, enter a data table data frame with three columns, the column names are Gene, Type, and Patient, and output a heat map, which can also be added above and to the right of the heat map. Histogram of mutations.
usage:

my_heatmap(vr, pal = c("#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(1,2)],"#F2F2F2",colorRampPalette(c("blue", "white", "red"))(5)[c(4,5)],brewer.pal(n = 8, name ="Accent")[c(1,4,6,8,2,3,5,7)],"#E31A1C","#6A3D9A"),type = c("DEL","LOSS","NEUTRAL","GAIN","AMPL","nonsynonymous SNV","synonymous SNV","intronic","stopgain","nonframeshift deletion","splicing", "frameshift deletion","UTR3","frameshift insertion","UTR5"),order_gene = T, order_patient = T, hist_plot = T, legend_dist = 0.4, col_text_cex = 1,xlab_adj=1, sub_gene= NULL,heatmap_mar = c(5,17,1,2), heatmap_oma=c(0.2,0.2,0.2,0.2),heatmap_mex=0.5, legend_mar = c(1,0,4,1), order_omit=c("NEUTRAL"))

Parameters :
vr : data frame containing mutation data, with three columns in total, the column names are Gene, Type, Patient;
pal : color palette, vector, which needs to be customized according to the number of mutation types in the data frame, which needs to be more than mutation types A color is used as the background color, and the background color is first;
type : the mutation type corresponding to the color palette, the vector, type must be equal to or more than all types that appear in the data; the default uses four types of mutation types of copy number Plus copy number neutrality plus all mutation types in annovar; the length is 1 less than the color palette when custom setting;
order_gene : default T, sort genes according to the number of patients with mutations ;
order_patient : default T, order patients according to Sort the number of mutated genes;
hist_plot : default T, add the corresponding histogram to the top and right;
legend_dist : default 0.4, adjust the distance between legends, generally need to adjust by yourself;
col_text_cex : adjust the size of the patient name, Default 1;
xlab_adj : Adjust the distance between the patient name and the heatmap;
sub_gene : Only select some genes for drawing, you need to give a vector of gene names, and the gene needs to exist in the data, default NULL
heatmap_mar : mar parameter, adjust the heatmap Edge length of front, back, left and right, default c(5,17,1,2)
heatmap_oma: oma parameter, adjust the length of the outer edges of the front, back, left and right sides of the heat map, default c(0.2,0.2,0.2,0.2)
mex : adjust the mex parameter of the heat map, used to describe the coordinates of the edge of the drawing, default 0.5
legend_mar : the mar parameter of legend , adjust the position of the legend, default c(1,0,4,1)
order_omit : mutation types to ignore when sorting, these mutation types will also be filtered in the histogram, default c("NEUTRAL"), if it does not exist" NEUTRAL" This mutation type can also keep the default parameters.

Details :

  • Before running, you need to load data.table1.10.4, RColorBrewer;
  • If you want to draw a heatmap with a histogram, because the size of the image is too large, you need to use the pdf function and give enough width and length;
  • The mutation type annovar annotated is used by default;
  • Because there are too many factors that affect the alignment of the heatmap and histogram when drawing, it is difficult to achieve better results by adjusting the corresponding parameters of mar, mex, and oma. Therefore, it is recommended to quickly draw a rough outline, and then use inkscape or adobe for typesetting alignment.
  • If the points of the mutation type in the heatmap are too small, the width and length of the pdf file can be reduced.

Example of use :

#without hist plot
pdf("~/project/PE/fromws02/PE/cnv_plot/heatmap_cnv_mut.pdf", height=12, width = 12)
my_heatmap(vr, heatmap_mar = c(17,17,1,2),hist_plot = F, legend_dist=0.1, xlab_adj = 1.2, order_patient = T, order_gene = T)
dev.off()

#with hist plot
pdf("~/project/PE/fromws02/PE/cnv_plot/heatmap_hist_cnv_mut.pdf", height=12, width = 12)
my_heatmap(vr, heatmap_mar = c(17,7,1,2),hist_plot = T, legend_dist=0.3, xlab_adj = 1.2, order_patient = T, order_gene = T)
dev.off()

#only a few gene
pdf("~/project/PE/fromws02/PE/cnv_plot/Assoc_CN1.pdf", height=2,width = 14)
my_heatmap(vr, heatmap_mar = c(7,17,1,2), sub_gene = c("CDKN2A", "GNAQ", "NOTCH1", "RB1", "SMAD4", "ABL1"),hist_plot = F,legend_dist=0.2, xlab_adj = 0.9, order_omit = "NEUTRAL")
dev.off()

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325692419&siteId=291194637