R语言基础Lecture4

通过镜像网站下载R语言的程序包

install.packages("包名称",repos="https://mirrors.tuna.tsinghua.edu.cn/CRAN/") ## 换成清华镜像

基础图形

条形图(Bar Plot)

需要导入vcd包

install.packages("vcd",repos="https://mirrors.tuna.tsinghua.edu.cn/CRAN/") 
> library(vcd) 
载入需要的程辑包:grid
> counts <- table(Arthritis$Improved)
> counts

  None   Some Marked 
    42     14     28 
> par(mfrow=c(2,2)) 
> barplot(counts, 
+         main="Simple Bar Plot",
+         xlab="Improvement", ylab="Frequency")
> barplot(counts, 
+         main="Horizontal Bar Plot",
+         xlab="Frequency", ylab="Improvement",
+         horiz=TRUE)
> counts <- table(Arthritis$Improved, Arthritis$Treatment) 
> counts
        
         Placebo Treated
  None        29      13
  Some         7       7
  Marked       7      21
> barplot(counts,
+         main="Stacked Bar Plot",
+         xlab="Treatment", ylab="Frequency",
+         col=c("red", "yellow","green"),
+         legend=rownames(counts)) 
> barplot(counts,
+         main="Grouped Bar Plot",
+         xlab="Treatment", ylab="Frequency",
+         col=c("red", "yellow", "green"),
+         legend=rownames(counts), beside=TRUE)

在这里插入图片描述

Pie Chart(饼图)

> library(plotrix)
> par(mfrow=c(2,2))
> slices <- c(10, 12,4, 16, 8)
> lbls <- c("US", "UK", "Australia", "Germany", "France")
> pie(slices, labels = lbls,main="Simple Pie Chart",edges=300,radius=1)
> pct <- round(slices/sum(slices)*100)
> lbls2 <- paste(lbls, " ", pct, "%", sep="")
> pie(slices, labels=lbls2, col=rainbow(length(lbls2)),
+     main="Pie Chart with Percentages",edges=300,radius=1)
> pie3D(slices, labels=lbls,explode=0.1,
+       main="3D Pie Chart ",edges=300,radius=1)
> mytable <- table(state.region)
> lbls3 <- paste(names(mytable), "\n", mytable, sep="")
> pie(mytable,labels=lbls3,
+     main="Pie Chart from a Table\n(with sample size)",
+     edges=300,radius=1)

在这里插入图片描述

Fan Plot(扇形图)

> slices <- c(10, 12,4, 16, 8)
> lbls <- c("US", "UK", "Australia", "Germany", "France") 
> fan.plot(slices, labels = lbls, main="Fan Plot")

在这里插入图片描述

Dot Plot(点图)

> dotchart(mtcars$mpg,
+          labels=row.names(mtcars),cex=0.7,
+          main="Gas Mileage for Car Models",
+          xlab="Miles Per Gallon")

在这里插入图片描述

基础统计

描述统计

head(x)查看数据集,展示x的前几行(默认前6行),可以通过head(x,n=10)来显示前10行

> head(mtcars)
                   mpg cyl disp  hp drat
Mazda RX4         21.0   6  160 110 3.90
Mazda RX4 Wag     21.0   6  160 110 3.90
Datsun 710        22.8   4  108  93 3.85
Hornet 4 Drive    21.4   6  258 110 3.08
Hornet Sportabout 18.7   8  360 175 3.15
Valiant           18.1   6  225 105 2.76
                     wt  qsec vs am gear
Mazda RX4         2.620 16.46  0  1    4
Mazda RX4 Wag     2.875 17.02  0  1    4
Datsun 710        2.320 18.61  1  1    4
Hornet 4 Drive    3.215 19.44  1  0    3
Hornet Sportabout 3.440 17.02  0  0    3
Valiant           3.460 20.22  1  0    3
                  carb
Mazda RX4            4
Mazda RX4 Wag        4
Datsun 710           1
Hornet 4 Drive       1
Hornet Sportabout    2
Valiant              1

summary()获取描述性统计量,可以提供最小值、最大值、四分位数和数值型变量的均值,以及因子向量和逻辑型向量的频数统计等

> summary(mtcars)
      mpg             cyl       
 Min.   :10.40   Min.   :4.000  
 1st Qu.:15.43   1st Qu.:4.000  
 Median :19.20   Median :6.000  
 Mean   :20.09   Mean   :6.188  
 3rd Qu.:22.80   3rd Qu.:8.000  
 Max.   :33.90   Max.   :8.000  
      disp             hp       
 Min.   : 71.1   Min.   : 52.0  
 1st Qu.:120.8   1st Qu.: 96.5  
 Median :196.3   Median :123.0  
 Mean   :230.7   Mean   :146.7  
 3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :472.0   Max.   :335.0  
      drat             wt       
 Min.   :2.760   Min.   :1.513  
 1st Qu.:3.080   1st Qu.:2.581  
 Median :3.695   Median :3.325  
 Mean   :3.597   Mean   :3.217  
 3rd Qu.:3.920   3rd Qu.:3.610  
 Max.   :4.930   Max.   :5.424  
      qsec             vs        
 Min.   :14.50   Min.   :0.0000  
 1st Qu.:16.89   1st Qu.:0.0000  
 Median :17.71   Median :0.0000  
 Mean   :17.85   Mean   :0.4375  
 3rd Qu.:18.90   3rd Qu.:1.0000  
 Max.   :22.90   Max.   :1.0000  
       am              gear      
 Min.   :0.0000   Min.   :3.000  
 1st Qu.:0.0000   1st Qu.:3.000  
 Median :0.0000   Median :4.000  
 Mean   :0.4062   Mean   :3.688  
 3rd Qu.:1.0000   3rd Qu.:4.000  
 Max.   :1.0000   Max.   :5.000  
      carb      
 Min.   :1.000  
 1st Qu.:2.000  
 Median :2.000  
 Mean   :2.812  
 3rd Qu.:4.000  
 Max.   :8.000

频率和列联表

table()可以展现变量的频率

> attach(mtcars)
> table(cyl)
cyl
 4  6  8 
11  7 14 
#cy1中4有11个,6有7个,8有14个
> summary(mpg)
   Min. 1st Qu.  Median    Mean 3rd Qu. 
  10.40   15.43   19.20   20.09   22.80 
   Max. 
  33.90 
#通过summary可以分析出mpg在[10,34]之间
#cut()可以讲区间划分
> table(cut(mpg,seq(10,34,by=2)))#10到34,2个一组

(10,12] (12,14] (14,16] (16,18] (18,20] 
      2       1       7       3       5 
(20,22] (22,24] (24,26] (26,28] (28,30] 
      5       2       2       1       0 
(30,32] (32,34] 
      2       2 

相关性

  • cov(x,y):x和y间的协方差;如果x,y为矩阵或数据框,返回x和y各列的协方差
  • var(x):向量x的样本方差;如果x是矩阵或数据框,协方差矩阵将被计算
  • cor(x):如果x是矩阵或数据框,相关系数矩阵将被计算
    求矩阵或数据框时cov=var
> states = state.x77[,1:6] 
> cov(states)
              Population      Income
Population 19931683.7588 571229.7796
Income       571229.7796 377573.3061
Illiteracy      292.8680   -163.7020
Life Exp       -407.8425    280.6632
Murder         5663.5237   -521.8943
HS Grad       -3551.5096   3076.7690
             Illiteracy     Life Exp
Population  292.8679592 -407.8424612
Income     -163.7020408  280.6631837
Illiteracy    0.3715306   -0.4815122
Life Exp     -0.4815122    1.8020204
Murder        1.5817755   -3.8694804
HS Grad      -3.2354694    6.3126849
                Murder      HS Grad
Population 5663.523714 -3551.509551
Income     -521.894286  3076.768980
Illiteracy    1.581776    -3.235469
Life Exp     -3.869480     6.312685
Murder       13.627465   -14.549616
HS Grad     -14.549616    65.237894
> var(states)
              Population      Income
Population 19931683.7588 571229.7796
Income       571229.7796 377573.3061
Illiteracy      292.8680   -163.7020
Life Exp       -407.8425    280.6632
Murder         5663.5237   -521.8943
HS Grad       -3551.5096   3076.7690
             Illiteracy     Life Exp
Population  292.8679592 -407.8424612
Income     -163.7020408  280.6631837
Illiteracy    0.3715306   -0.4815122
Life Exp     -0.4815122    1.8020204
Murder        1.5817755   -3.8694804
HS Grad      -3.2354694    6.3126849
                Murder      HS Grad
Population 5663.523714 -3551.509551
Income     -521.894286  3076.768980
Illiteracy    1.581776    -3.235469
Life Exp     -3.869480     6.312685
Murder       13.627465   -14.549616
HS Grad     -14.549616    65.237894
> cor(states)
            Population     Income
Population  1.00000000  0.2082276
Income      0.20822756  1.0000000
Illiteracy  0.10762237 -0.4370752
Life Exp   -0.06805195  0.3402553
Murder      0.34364275 -0.2300776
HS Grad    -0.09848975  0.6199323
           Illiteracy    Life Exp
Population  0.1076224 -0.06805195
Income     -0.4370752  0.34025534
Illiteracy  1.0000000 -0.58847793
Life Exp   -0.5884779  1.00000000
Murder      0.7029752 -0.78084575
HS Grad    -0.6571886  0.58221620
               Murder     HS Grad
Population  0.3436428 -0.09848975
Income     -0.2300776  0.61993232
Illiteracy  0.7029752 -0.65718861
Life Exp   -0.7808458  0.58221620
Murder      1.0000000 -0.48797102
HS Grad    -0.4879710  1.00000000

T检验

> x = rnorm(100, mean = 10, sd = 1)#平均数为10,标准差为1
> y = rnorm(100, mean = 30, sd = 10)#平均数为30,标准差为10
> t.test(x, y, alt = "two.sided",paired=TRUE)

	Paired t-test

data:  x and y
t = -19.353, df = 99, p-value <
2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -21.78096 -17.73004
sample estimates:
mean of the differences 
               -19.7555 

群体差异的非参数检验

> wilcox.test(x,y,alt="less")

	Wilcoxon rank sum test with
	continuity correction

data:  x and y
W = 294, p-value < 2.2e-16
alternative hypothesis: true location shift is less than 0
发布了28 篇原创文章 · 获赞 0 · 访问量 855

猜你喜欢

转载自blog.csdn.net/weixin_43866408/article/details/104756922