两个样本的均值比较(two samples average)

为了更好全面的展示分析步骤,下面展示的是R全代码和分析的具体顺序步骤(五个例子)

要是有困惑的,可以把代码输入Chatgpt查看详细解析:【资源共享】分享3个免费ChatGPT国内AI软件,请及时收藏!-CSDN博客

#1, data0306-deer 鹿的前后腿长,采用合适的统计方法(说明选用依据),检验前后腿长有无差异?后腿是否比前腿长?


#方法选择:因为n1,n2都小于12,选择wilcox符号秩检验
library(haven)   # haven包读取sav格式文件
data1 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0306 deer.sav")
data1

# A tibble: 10 × 3
    Deer Hindleg Foreleg
   <dbl>   <dbl>   <dbl>
 1     1     142     138
 2     2     140     136
 3     3     144     147
 4     4     144     139
 5     5     142     143
 6     6     146     141
 7     7     149     143
 8     8     150     145
 9     9     142     136
10    10     148     146

wilcox.test(data1$Hindleg,data1$Foreleg,paired = T)
##输出结果:
#Wilcoxon signed rank test with continuity correction
#data:  data1$Hindleg and data1$Foreleg
#V = 51, p-value = 0.01859
#alternative hypothesis: true location shift is not equal to 0
#
#Warning message: In wilcox.test.default(data1$Hindleg, data1$Foreleg, paired = T) :无法精確計算带连结的p值7
##结论
#p-value = 0.01859,小于0.05,前后腿长有明显差异
wilcox.test(data1$Hindleg,data1$Foreleg,paired = T,alternative = "greater")
##输出结果:V = 51, p-value = 0.009297
##结论:后腿比前腿长


#2, data0307 excersize 是成年人经过一段体育锻炼前后的体重,采用合适的统计方法(说明选用依据),检验锻炼前后体重有无差异?按照常理,经过体育锻炼后,体重会有所下降,采用合适的统计方法(说明选用依据),检验该数据是否支持这一说法?


#方法选择:因为n1,n2都大于30,进行t检验
library(haven)   # haven包读取sav格式文件
data2 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0307 exercise.sav")
data2

# A tibble: 33 × 3
      ID   Pre  Post
   <dbl> <dbl> <dbl>
 1     1   165   163
 2     2   180   179
 3     3   175   180
 4     4   160   161
 5     5   185   170
 6     6   177   170
 7     7   190   185
 8     8   200   192
 9     9   195   190
10    10   198   180
# ℹ 23 more rows
# ℹ Use `print(n = ...)` to see more rows


var.test(data2$Pre,data2$Post)  #F分布比较方差
##输出:
#data:  data2$Pre and data2$Post
#F = 2.0483, num df = 32, denom df = 32, p-value = 0.04643
#alternative hypothesis: true ratio of variances is not equal to 1
#95 percent confidence interval:
#  1.011648 4.147360
#sample estimates:
#  ratio of variances 
#2.048333
t.test(data2$Pre, data2$Post, var.equal=TRUE) 
##输出:
#t = 2.7245, df = 64, p-value = 0.008293
#alternative hypothesis: true difference in means is not equal to 0
#95 percent confidence interval:
#  1.560182 10.136787
#sample estimates:
#  mean of x mean of y 
#186.3636  180.5152 

##结论
#p-value = 0.008293,小于0.05,锻炼前后体重有差异

t.test(data2$Pre, data2$Post, var.equal=TRUE,alternative = "greater") 
##输出结果:
#data:  data2$Pre and data2$Post
#t = 2.7245, df = 64, p-value = 0.004146
#alternative hypothesis: true difference in means is greater than 0
#95 percent confidence interval:
#  2.265802      Inf
#sample estimates:
#  mean of x mean of y 
#186.3636  180.5152 

##结论
#p-value = 0.004146,该数据是否支持“经过体育锻炼后,体重会有所下降,”这一说法。


#3, data0308 fiber 是某灵长类物种采食和不采食植物的叶片纤维素干重比例,采用合适的统计方法(说明选用依据),检验食物和非食物的干重比例有无差异?采用合适的统计方法(说明选用依据),检验食物的干重比例是否小于非食物?


#方法选择:n1=35,n2=15,验证两组独立数据发现:每组的数据服从正态分布,所以采用t测验
library(haven)   # haven包读取sav格式文件
data3 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0308 fiber.sav")
data3

# A tibble: 50 × 3
    Item Food        Fiber
   <dbl> <dbl+lbl>   <dbl>
 1     1 0 [Nonfood]  17.8
 2     2 0 [Nonfood]  16.8
 3     3 0 [Nonfood]  13.2
 4     4 1 [Food]     12.9
 5     5 0 [Nonfood]  18.6
 6     6 0 [Nonfood]  11.8
 7     7 0 [Nonfood]  17.1
 8     8 0 [Nonfood]  10.2
 9     9 0 [Nonfood]  22.3
10    10 0 [Nonfood]  12.2
# ℹ 40 more rows
# ℹ Use `print(n = ...)` to see more rows

data30<-data3[data3$Food==0,]   #该灵长类物种不采食植物的叶片纤维素干重比例
data30

# A tibble: 35 × 3
    Item Food        Fiber
   <dbl> <dbl+lbl>   <dbl>
 1     1 0 [Nonfood]  17.8
 2     2 0 [Nonfood]  16.8
 3     3 0 [Nonfood]  13.2
 4     5 0 [Nonfood]  18.6
 5     6 0 [Nonfood]  11.8
 6     7 0 [Nonfood]  17.1
 7     8 0 [Nonfood]  10.2
 8     9 0 [Nonfood]  22.3
 9    10 0 [Nonfood]  12.2
10    11 0 [Nonfood]  12.5
# ℹ 25 more rows
# ℹ Use `print(n = ...)` to see more rows

data31<-data3[data3$Food==1,]   #该灵长类物种采食植物的叶片纤维素干重比例
data31

# A tibble: 15 × 3
    Item Food      Fiber
   <dbl> <dbl+lbl> <dbl>
 1     4 1 [Food]  12.9 
 2    12 1 [Food]  12.0 
 3    13 1 [Food]   5.05
 4    22 1 [Food]  10.6 
 5    25 1 [Food]  10   
 6    26 1 [Food]  17.3 
 7    27 1 [Food]  14.7 
 8    29 1 [Food]  11.3 
 9    35 1 [Food]   7.8 
10    36 1 [Food]  10.5 
11    37 1 [Food]  10.6 
12    39 1 [Food]   7.22
13    41 1 [Food]  15.9 
14    47 1 [Food]  19.0 
15    50 1 [Food]  13.6 

#检验两组中每组的数据是否服从正态分布
shapiro.test(data30$Fiber)
##输出结果:W = 0.95411, p-value = 0.1515
##结论:p-value = 0.1515大于0.05,该灵长类物种不采食植物的叶片纤维素干重比例符合正态分布
shapiro.test(data31$Fiber)
##输出结果:W = 0.98371, p-value = 0.9887
##结论:p-value = 0.9887大于0.05,该灵长类物种采食植物的叶片纤维素干重比例符合正态分布

#检验这两个总体是否符合方差齐性
var.test(data30$Fiber,data31$Fiber)
##输出结果:F = 1.922, num df = 34, denom df = 14, p-value = 0.1919
##结论: p-value = 0.1919大于0.05,符合方差齐性,于是用t测验

t.test(data30$Fiber,data31$Fiber,var.equal = T)
##输出结果:t = 2.5601, df = 48, p-value = 0.01367
##结论:p-value = 0.01367小于0.05,该灵长类物种食物和非食物的纤维素干重比例有差异
t.test(data31$Fiber,data30$Fiber,var.equal = T,alternative = "less")
##输出结果:t = 2.5601, df = 48, p-value = 0.006834
##结论:p-value = 0.006834小于0.05,该灵长类物种食物纤维素干重比例不比非食物小

#4, data0309 protein 是该灵长类物种采食和不采食植物的叶片蛋白质干重比例,采用合适的统计方法(说明选用依据),检验食物和非食物的干重比例有无差异?采用合适的统计方法(说明选用依据),检验食物的干重比例是否大于非食物?


##方法选择:n1=35,n2=15,验证两组数据,但是有一组的数据不服从正态分布,所以采用Wilcox符号秩检验

library(haven)   # haven包读取sav格式文件
data4 <- read_sav("D:\\Datum\\生物统计\\data\\data3/data0309 protein.sav")
data4

# A tibble: 50 × 3
    Item Food        Protein
   <dbl> <dbl+lbl>     <dbl>
 1     1 0 [Nonfood]    15.7
 2     2 0 [Nonfood]    13.7
 3     3 0 [Nonfood]    18.8
 4     4 1 [Food]       25.6
 5     5 0 [Nonfood]    13.9
 6     6 0 [Nonfood]    16.2
 7     7 0 [Nonfood]    11.8
 8     8 0 [Nonfood]    16.1
 9     9 0 [Nonfood]    12.5
10    10 0 [Nonfood]    11.5
# ℹ 40 more rows
# ℹ Use `print(n = ...)` to see more rows

data40<-data4[data4$Food==0,]   #该灵长类物种不采食植物的叶片蛋白质干重比例
data40

# A tibble: 35 × 3
    Item Food        Protein
   <dbl> <dbl+lbl>     <dbl>
 1     1 0 [Nonfood]    15.7
 2     2 0 [Nonfood]    13.7
 3     3 0 [Nonfood]    18.8
 4     5 0 [Nonfood]    13.9
 5     6 0 [Nonfood]    16.2
 6     7 0 [Nonfood]    11.8
 7     8 0 [Nonfood]    16.1
 8     9 0 [Nonfood]    12.5
 9    10 0 [Nonfood]    11.5
10    11 0 [Nonfood]    11.8
# ℹ 25 more rows
# ℹ Use `print(n = ...)` to see more rows

data41<-data4[data4$Food==1,]   #该灵长类物种采食植物的叶片蛋白质干重比例
data41

# A tibble: 15 × 3
    Item Food      Protein
   <dbl> <dbl+lbl>   <dbl>
 1     4 1 [Food]    25.6 
 2    12 1 [Food]    11.1 
 3    13 1 [Food]    10.3 
 4    22 1 [Food]    18.8 
 5    25 1 [Food]    12.8 
 6    26 1 [Food]    13.0 
 7    27 1 [Food]    15.0 
 8    29 1 [Food]    16.2 
 9    35 1 [Food]     6.45
10    36 1 [Food]    12.3 
11    37 1 [Food]    23.5 
12    39 1 [Food]    11.6 
13    41 1 [Food]    11.1 
14    47 1 [Food]    16.9 
15    50 1 [Food]    21 

#检验两组中每组的数据是否服从正态分布
shapiro.test(data40$Protein)
##输出结果:W = 0.82641, p-value = 7.027e-05
##结论:p-value = 7.027e-05小于0.05,该灵长类物种不采食植物的叶片纤维素干重比例符合不正态分布
shapiro.test(data41$Protein)
##输出结果:W = 0.94579, p-value = 0.4607
##结论:p-value = 0.4607小于0.05,该灵长类物种采食植物的叶片纤维素干重比例不符合正态分布


wilcox.test(data40$Protein,data41$Protein)
##输出结果:W = 258.5, p-value = 0.9409
##结论:p-value = 0.9409大于0.05,该灵长类物种食物和非食物的纤维素干重比例无差异
wilcox.test(data41$Protein,data40$Protein,alternative = "greater")
##输出结果:W = 266.5, p-value = 0.4705
##结论:p-value = 0.4705大于0.05,该灵长类物种食物蛋白质干重比例不比非食物大

#5, 一般情况下,灵长类喜欢吃蛋白质/纤维素比例(ratio)更高的食物,基于data0308 fiber 和 data0309 protein,采用合适的统计方法(说明选用依据),检验该物种是否符合这种情况?(提示:需合并数据)


#方法选择:n1=35,n2=15,验证两组数据,发现其中一组的数据不服从正态分布,所以采用Wilcox符号秩检验
data51 <- data41$Protein/data31$Fiber  #蛋白质/纤维素比例的食物数据
data51

 [1] 1.9906832 0.9226933 2.0316832 1.7640977 1.2750000 0.7474048 1.0190606 1.4335106 0.8269231 1.1755725 2.2124060
[12] 1.6052632 0.7011349 0.8869611 1.5407190
attr(,"label")
[1] "percent dry matter"
attr(,"format.spss")
[1] "F8.2"

data50 <- data40$Protein/data30$Fiber  #蛋白质/纤维素比例的非食物数据
data50

 [1] 0.8830146 0.8159619 1.4248862 0.7467742 1.3703390 0.6883421 1.5743640 0.5596948 0.9378577 0.9440448 0.9754702
[12] 1.4577320 1.0391960 0.6523785 0.3996877 0.4640403 0.5574371 1.1700581 2.8302083 0.7869023 2.3851852 1.5460340
[23] 0.4154443 0.3163403 1.5139296 0.8476421 1.1578378 0.6464411 0.7935294 0.6789906 2.7776946 1.8446995 0.9605688
[34] 3.7619632 0.6978541
attr(,"label")
[1] "percent dry matter"
attr(,"format.spss")
[1] "F8.2"

#检验两组中每组的数据是否服从正态分布
shapiro.test(data51)
##输出结果:W = 0.93386, p-value = 0.3113
##结论:p-value = 0.3113大于0.05,蛋白质/纤维素比例的食物数据符合正态分布
shapiro.test(data50)
##输出结果:W = 0.8164, p-value = 4.349e-05
##结论:p-value = 0.4607小于0.05,蛋白质/纤维素比例的非食物数据不符合正态分布


wilcox.test(data51,data50,alternative = "greater")
##输出结果:W = 177, p-value = 0.03584
##结论:p-value = 0.03584小于0.05,所以灵长类喜欢吃蛋白质/纤维素比例(ratio)更高的食物,
 

猜你喜欢

转载自blog.csdn.net/m0_61164319/article/details/134368293