enrichments富集分析 findallmarkers的结果进行富集分析 小鼠

.libPaths()

#数据太大转到Linux服务器处理
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/refdir/Rlib/",  "/usr/local/lib/R/library"))


getwd()

dir.create("./enrichments")
setwd("/home/data/t040413/silicosis/fibroblast_myofibroblast/enrichments/")

#######差异分析
###########################33
library(clusterProfiler)
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(dplyr)
library(stringr)
library(openxlsx)
library(org.Mm.eg.db)
library("reshape")
library("RColorBrewer")

#
#remotes::install_github('satijalab/seurat-wrappers')
#
#usethis::create_github_token()
#usethis::edit_r_environ()

getwd()
file="/home/data/t040413/silicosis/fibroblast_myofibroblast/enrichments/0.69"
dir.create(file)
setwd(file)
getwd()

?loadWorkbook
#library(xlsx)  #比openxlsx读取文件慢多了!!!!!
##再三检查文件路径!!!!!!!!!!!!
markers = read.table("/home/data/t040413/silicosis/fibroblast_myofibroblast/markers_after_reclustername.txt",header = T)
head(markers)
#View(markers)
names(table(markers$cluster))
#markers$"Myofibroblast/vascular smooth muscle cell"<-"myo-fib"

#markers$cluster[markers$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo"  #把第17个cluster的非法名字改成正常名字,因为给导出的xlsx命名时使用了custer的名字
names(table(markers$cluster))

k <- keys(org.Mm.eg.db, keytype = "ENTREZID")
gene.list <- select(org.Mm.eg.db, keys = k, columns = c("SYMBOL", "ENSEMBL"), keytype = "ENTREZID")## 73306     3
head(gene.list)

#自建函数   输入go富集条目  和文件的名称 就返回相应的pdf文件和xlsx文件
clusterProfilerOut = function(enrichRes,outfile){
  enrichRes$function.gene.num = unlist(strsplit(enrichRes[,"BgRatio"],"/"))[seq(1,length(unlist(strsplit(enrichRes[,"BgRatio"],"/"))),2)]
  enrichRes$GeneRatio = enrichRes$Count / as.numeric(enrichRes$function.gene.num) #计算generatio
  write.xlsx(enrichRes,paste0(outfile, ".xlsx"),col.names=T,row.names=F)
  
  if (dim(enrichRes)[1]>=10) {enrichRes.use = enrichRes[1:10,]} else{enrichRes.use = enrichRes[1:dim(enrichRes)[1],]}#显示的富集条目不超过10条
  enrichRes.use = enrichRes.use[order(enrichRes.use$GeneRatio, decreasing=T),] #按照generatio降序显示
  enrichRes.use$Description = factor(enrichRes.use$Description,levels=rev(enrichRes.use$Description))
  gap = (max(enrichRes.use$GeneRatio) - min(enrichRes.use$GeneRatio))/5
  
  #制作并输出pdf
  pdf(paste0(outfile, ".pdf"))
  p = ggplot(enrichRes.use,mapping = aes(x=GeneRatio,y=Description))+
    geom_point(aes(size=Count,color=p.adjust))+theme_bw()+ 
    scale_color_gradient(low = "blue", high = "red")+
    scale_x_continuous(expand = c(gap, gap))+ylab(NULL)+ 
    scale_y_discrete(labels=function(x) str_wrap(x, width=60))
  print(p)
  dev.off()
}  



getwd() #此时手动把all_cluster_markers.xlsx文件复制到工作目录下
path=getwd()


#dir(path)

files=dir(path)
files #得到工作目录下的所有文件名
getwd()


'''
path="G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/r=0.6_11clusters_0.69fc"
dir.create(path)
setwd(path)
'''
getwd()
head(markers)
#library(xlsx)

for (i in files) {
  #i="all_cluster_markers.xlsx"
  input = read.table(paste(path, i, sep = "/"),header = T)#读取xlsx文件  注意有的时候需要加row.names 有的时候不需要加
  head(input)
  # input$cluster[input$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo"  #把第17个cluster的非法名字改成正常名字,因为给导出的xlsx命名时使用了custer的名字
  
  input$symbol<-input$gene
  head(input)
  input$FeatureName.entrez = gene.list[match(input$symbol, gene.list$SYMBOL),"ENTREZID"]#在input文件的基础上添加symbol与entrezid相对应的列FeatureName.entrez
  head(input)
  #input$change=ifelse(input$avg_log2FC>0,"up","down")
  input.sub<-input[input$p_val_adj<0.001,]  #取出所有p值小于0.001的
  print(head(input.sub))
  dim(input.sub)
  
  input.sub = na.omit(input[,c("cluster","FeatureName.entrez","avg_log2FC","p_val_adj")])
  head(input.sub)
  colnames(input.sub) = c("cluster","gene", "log2FC", "PValue")
  print(head(input.sub))
  subset_data=input.sub
  head(subset_data)
  
  
  cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
  length(cluster_number)
  cluster_number
  str(cluster_number)
  
  names(table(subset_data$cluster))[1]
  subset_data[subset_data$cluster=="0",]
  
  
  ##注意文件中cluster的命名 最好不要出现斜杠|等特殊字符   容易出错
  #注意p值是否需要约束?
  #注意是否需要正负号表示上下调?
  #for (cluster_num in 0:(length(cluster_number)-1)) { #对于cluster为数字的
  
  ##只看上调的基因 ,注意修改名字!!!!!!!!!!!!! 
  subset_data = subset(input.sub, log2FC>0.69&PValue<0.05)
  head(subset_data)
  cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
  cluster_number
  for (cluster_num in cluster_number) {     ##对于cluster为字符的
    
    
    ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改
                 keyType = "ENTREZID", 
                 OrgDb = org.Mm.eg.db, 
                 ont = "ALL", readable = TRUE,
                 pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
    res = ego[ego$p.adjust<0.05,]
    print(dim(res))
    print(table(res$ONTOLOGY))
    
    if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉
      clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichGO"))
    }
    
    ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, 
                          organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
    res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
    print(dim(res)) 
    
    if(nrow(res) !=0){#kegg
      clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichKEGG"))
    }
    
    
    
    save(ego,ego.KEGG,file = paste0(cluster_num,"_enrichments_up.Rdata"))
    
  }
  
  
  
  
  
  
  ##只看下调的基因 ,注意修改名字!!!!!!!!!!!!! 
  subset_data = subset(input.sub, log2FC<(-0.69)&PValue<0.05)
  head(subset_data)
  cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
  cluster_number
  for (cluster_num in cluster_number) {     ##对于cluster为字符的
    
    
    ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改
                 keyType = "ENTREZID", 
                 OrgDb = org.Mm.eg.db, 
                 ont = "ALL", readable = TRUE,
                 pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
    res = ego[ego$p.adjust<0.05,]
    print(dim(res))
    print(table(res$ONTOLOGY))
    
    if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉
      clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichGO"))
    }
    
    ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, 
                          organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
    res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
    print(dim(res)) 
    
    if(nrow(res) !=0){#kegg
      clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichKEGG"))
    }
    
  }
  
  
  
  
  
  ##不区分上下调 #################################  
  subset_data = subset(input.sub, abs(log2FC)>0.69&PValue<0.05)
  head(subset_data)
  cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
  cluster_number
  for (cluster_num in cluster_number) {     ##对于cluster为字符的
    
    
    ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改
                 keyType = "ENTREZID", 
                 OrgDb = org.Mm.eg.db, 
                 ont = "ALL", readable = TRUE,
                 pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
    res = ego[ego$p.adjust<0.05,]
    print(dim(res))
    print(table(res$ONTOLOGY))
    
    if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉
      clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichGO"))
    }
    
    ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, 
                          organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
    res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
    print(dim(res)) 
    
    if(nrow(res) !=0){#kegg
      clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichKEGG"))
    }
    
  }
  
} 


dev.off() 







path="D:/ARDS_scripts_1012/ARDS/Step2_harmony_f200_R3/ws/Fibroblast/0.5/sepsis_Fibroblast_r0.5.rds"
load(path)
table(subset_data$stim,subset_data$seurat_clusters)

DimPlot(subset_data,group.by = "stim")
DimPlot(subset_data,split.by = "stim",
        group.by = "seurat_clusters",
        label = TRUE)

猜你喜欢

转载自blog.csdn.net/qq_52813185/article/details/130830117