Multi-group data statistical analysis and drawing R language implementation

A piece of code written when writing the statistics of the graduation project, a large amount of data can quickly be counted and the results can be counted, which is very convenient.
The statistics use the r basic stat package, and the ggplot2 package is used for drawing. It is very common, and there are many online tutorials.

Input data

The example is a column of numbers randomly generated by excel
random number

y <- read.table("clipboard", header = F)

This method is to directly access the clipboard, and functions such as read.xlsx, read.table, read.csvand so on can be used to read the sorted data.
Group information can also be read in.

Enter group information

a1 <- factor(c(rep(c('模型组','低剂组','高剂组'),
                  each = 5)))
a2 <- factor(c(rep(c('空白组','模型组','治疗组'),
                  time = 3)))

The difference between each and time parameters can be run by yourself.

Calculate the mean and standard deviation for each group

mean <- tapply(y$V1,a1,mean)
sd <- tapply(y$V1,a1,sd)
data <- data.frame(mean,sd)
#排序
data$group <- factor(row.names(data), levels = c('模型组','低剂组','高剂组'))

normality test

tapply(y$V1,a1,chisq.test)

homogeneity of variance test

bartlett.test(y$V1,a1)

ANOVA

result.aov<-aov (y$V1~a1)
summary(result.aov)
# 两两比较
pairwise.t.test(y$V1,a1,p.adjust.method = 'bonferroni')

Another Multiple Comparison Method

TukeyHSD(result.aov)
plot(TukeyHSD(result.aov))

Non-parametric test

kruskal.test(y$V1,a1)
#两两比较
pairwise.wilcox.test(y$V1,a1,p.adjust.method = "bonferroni")

Use the sink() function to get the result

sink("calc.txt")
print(data)
print(tapply(y$V1,a1,chisq.test))
print(bartlett.test(y$V1,a1))
#多组数据符合正态分布,且方差齐则使用单因素方差分析。
result.aov<-aov (y$V1~a1)
print(summary(result.aov))
print(pairwise.t.test(y$V1,a1,p.adjust.method = 'bonferroni'))
#多组数据不符合正态分布;或虽符合正态分布,但方差不齐,则使用非参数检验。
print(kruskal.test(y$V1,a1))
print(pairwise.wilcox.test(y$V1,a1,p.adjust.method = 'bonferroni'))
sink()

output result

      mean       sd  group
低剂组 14.2 2.167948 低剂组
高剂组 25.0 1.581139 高剂组
模型组  7.2 3.420526 模型组
$低剂组

	Chi-squared test for given probabilities

data:  X[[i]]
X-squared = 1.3239, df = 4, p-value = 0.8573


$高剂组

	Chi-squared test for given probabilities

data:  X[[i]]
X-squared = 0.4, df = 4, p-value = 0.9825


$模型组

	Chi-squared test for given probabilities

data:  X[[i]]
X-squared = 6.5, df = 4, p-value = 0.1648



	Bartlett test of homogeneity of variances

data:  y$V1 and a1
Bartlett's K-squared = 2.1535, df = 2, p-value = 0.3407

            Df Sum Sq Mean Sq F value   Pr(>F)    
f            2  804.1   402.1   63.82 4.03e-07 ***
Residuals   12   75.6     6.3                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

	Pairwise comparisons using t tests with pooled SD 

data:  y$V1 and a1

       低剂组  高剂组 
高剂组 5.7e-05 -      
模型组 0.0026  3.1e-07

P value adjustment method: bonferroni 
	Kruskal-Wallis rank sum test

data:  y$V1 and a1
Kruskal-Wallis chi-squared = 12.545, df = 2, p-value = 0.001888

	Pairwise comparisons using Wilcoxon rank sum test with continuity correction 

data:  y$V1 and a1 

       低剂组 高剂组
高剂组 0.036  -     
模型组 0.035  0.036 

P value adjustment method: bonferroni 

Draw a bar graph with error bars

library(ggplot2)
p<-ggplot(data,aes(group,mean))+
  geom_col(
    fill = c('white','black','gray80'),
    col ='black',
    width = 0.6,
    position = position_dodge(0.8),
    lwd = 1.5,
  )+
  theme_classic(
    base_size = 32,
    base_family = 'serif'#字体
  )+
  scale_y_continuous(
    expand = c(0,0)
  )+
  scale_x_discrete(
    expand = c(0.2,0.2)
  )+
  geom_blank(aes(y= (mean+sd)*1.2))+
  geom_errorbar(
    aes(ymin = mean, ymax = mean+sd),
    width = 0.3,lwd = 1.5
  )+
  xlab("这是x轴")+
  ylab("这是y轴")
p

insert image description here

save the picture

tiff('barplot.tif')
p
dev.off()

Violin plot, add error bars, no legend

Violin plots and boxplots use all the data and need to build a table containing all the data.

data2 <- as.data.frame(y$V1)
data2$fenzu <- a1
#注意两个表中的同样数据列名一致,要不会报错。
colnames(data2) <-c('mean','group')
#排序
data2$group <- factor(data2$group, levels = c('模型组','低剂组','高剂组'))

p2<-ggplot(data = data2,aes(group,mean,fill=group))+
  geom_violin(trim=FALSE,color="black",
              lwd = 1.5
  )+
  theme_classic(
    base_size = 32,
    base_family = 'serif'
  )+
  geom_blank(data = data, aes(y= (mean+sd)*1.2))+
  geom_point(data = data, aes(group,mean),pch = 18, size = 6)+
  geom_errorbar(data = data,
                aes(ymin = mean-sd, ymax = mean+sd),
    width = 0.1,lwd = 1.5
  )+
  xlab("这里是x轴")+
  ylab("这里是y轴")+
  theme(legend.position="none")
p2

insert image description here

Violin plot plus boxplot

p3<- ggplot(data2, aes(group, mean ,fill = group))+
  geom_violin(trim=FALSE,color="black",
              lwd = 1.5
  )+
  geom_boxplot(width=0.2,position=position_dodge(0.9),lwd= 1.5)+
  theme_classic(
    base_size = 32,
    base_family = 'serif'
  )+
  geom_blank(data = data, aes(y= (mean+sd)*1.2))+
  xlab("这里是x轴")+
  ylab("这里是y轴")+
  theme(legend.position="none")
p3

insert image description here

Guess you like

Origin blog.csdn.net/weixin_55842556/article/details/119849825