R语言(二)

图形初阶

创建和保存图形

plot(mtcars$wt, mtcars$mpg)
abline(lm(mtcars$mpg~mtcars$wt))
title("Regression of MPG on Weight")

保存为pdf

pdf("mygraph.pdf")
plot(mtcars$wt, mtcars$mpg)
abline(lm(mtcars$mpg~mtcars$wt))
title("Regression of MPG on Weight")
dev.off()

除了pdf()外,还可以使用函数png(), jpeg(), bmp(), tiff()等将图形保存为其他格式

创建新图形窗口

dev.new()

图形参数的修改

·一个例子

dose <- c(20,30,40,45,60)
drugA <- c(16,20,27,40,60)
drugB <- c(15,18,25,31,40)
plot(dose,drugA,type="b")

type="b" 表示画点图

修改图形参数的方法

     ①通过par()函数修改

     ②在绘图函数中直接设置

使用实心三角形和虚线绘图

opar <-par(no.readonly = TRUE)
par(lty=2,pch=17)
plot(dose, drugA, type="b")
par(opar)

plot(dose, drugA, type="b", lty=2, pch=17)

符号和线条参数


 

颜色参数

n <-10
mycolors <-rainbow(n)
pie(rep(1,n), labels=mycolors, col=mycolors)

指定文本属性参数

图形尺寸与边界尺寸参数

opar <-par(no.readonly = TRUE)
par(pin =c(2,3))
par(lwd=2, cex=1.5)
par(cex.axis=.75, font.axis=3)
plot(dose,drugA,type="b",pch=19,lty=2,col="red")

设置坐标轴和文本标注

plot(dose, drugA, type="b", pch=19, col="blue", 
     cex.axis=1.5, col.axis="darkgreen",font.axis=2, 
     main="这是主标题:plot初探", font.main=2, cex.main=2, col.main="green", 
     sub="这是副标题:图1", font.sub=3, cex.sub=1.5, col.sub="red",
     xlab="dose", ylab="drugA", cex.lab=1.5,font.lab=1,col.lab="grey20",
     xlim=c(0,60),ylim=c(0,70))

添加次刻度线

添加参考线

plot(dose, drugA, type="b")
abline(h=c(40), v=c(40), lty=2, col="blue")

添加图例

opar <-par(no.readonly = TRUE)
par(lwd=2, cex=1.5, font.lab=2)
plot(dose, drugA, type="b", pch=15, lty=1, col="red", ylim=c(0,60), main="DrugA vs. DrugB", xlab="Drug dosage", ylab="Drug response")
abline(h=c(30), lwd=1.5, lty=2, col="gray")
library(Hmisc)
minor.tick(nx=3, ny=3, tick.ration=0.5)
minor.tick(nx=3, ny=3, tick.ratio=0.5)
lines(dose,drugB, type="b", pch=17, col="blue", lty=2)
legend("topleft", inset=.05, title="Drug Type", c("A","B"),lty=c(1,2), pch=c(15,17), col=c("red","blue"))
par(opar)

文本标注

plot(mtcars$wt, mtcars$mpg, main="Mileage vs. Car Weight", xlab="Weight", ylab="Mileage", pch=18, col="blue")
text(mtcars$wt, mtcars$mpg, row.names(mtcars), cex=0.6, pos=4, col="red")

图形的组合

opar <-par(no.readonly = TRUE)
par(mfrow=c(2,2))
plot(mtcars$wt, mtcars$mpg, main="Scatterplot of WT vs MPG")
plot(mtcars$wt, mtcars$disp, main="Scatterplot of wt vs disp")
hist(mtcars$wt, main="Histogram of wt")
boxplot(mtcars$wt, main="Boxplot of wt")
par(opar)

layout(matrix(c(1,1,2,3),2,2,byrow=T))
hist(mtcars$wt)
hist(mtcars$mpg)
hist(mtcars$disp)

layout(matrix(c(1,1,2,3),2,2,byrow=T),widths=c(3,1),heights=c(1,2))
hist(mtcars$wt)
hist(mtcars$mpg)
hist(mtcars$disp)

数据管理

创建数据框示例

#创建leadership数据框
manager <-c(1,2,3,4,5)
data <-c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
country <-c("US","US","UK","UK", "UK")
gender <-c("M","F","F","M","F")
age <-c(32,45,25,39,39)
q1 <-c(5,3,3,3,2)
q2 <-c(4,5,5,3,2)
q3 <-c(5,2,5,4,1)
q4 <-c(5,5,2,NA,2)
q5 <-c(5,5,2,NA,1)
leadership <-data.frame(manager,data,country,gender,age,q1,q2,q3,q4,q5,stringAsFactor = FALSE)

变量

在数据框中创建新的变量

#method1

mydata<-data.frame(x1=c(2,2,6,4),x2=c(3,4,2,8))
mydata$sumx<-mydata$x1+mydata$x2
mydata$meanx<-(mydata$x1+mydata$x2)/2

#method2

attach(mydata)
mydata$sumx<-x1+x2
mydata$meanx<-(x1+x2)/2

#method3

mydata <-transform(mydata, sumx=x1+x2, meanx=(x1+x2)/2)

变量的重编码

leadership$age[leadership$age==99]<-NA
leadership$agecat[leadership$age>75]<-"Elder"
leadership$agecat[leadership$age>=55 & leadership$age<=75]<-"Middle Aged"
leadership$agecat[leadership$age<55]<-"Young"

更为紧凑的代码形式

leadership <-within(leadership,{
                     agecat<-NA
                     agecat[age>75]<-"Elder"
                     agecat[age>=55 & age<=75]<-"Middle Aged"
                     agecat[age<55]<-"Young"})

变量重命名

缺失值

is.na(leadership[,6:10])

日期值

数据排序

数据集的合并

data1<-data.frame(x1=c(1,2,3),x2=c(3,4,5),x3=c(5,6,7))
data2<-data.frame(x1=c(1,2,3),x2=c(7,8,9),x3=c(5,6,7))
total<-merge(data1,data2,by="x3")

数据集取子集——剔除变量

myvars<-names(leadership)%in%c("q3","q4")
newdata<-leadership[!myvars]

数据集取子集-选入观测

随机抽样

newdata<-leadership[sample(1:nrow(leadership),3,replace=FALSE),]

使用SQL语句操作数据框

library(sqldf)
newdf <- sqldf("select * from mtcars where carb=1 order by mpg", row.names=TRUE)

发布了329 篇原创文章 · 获赞 156 · 访问量 15万+

猜你喜欢

转载自blog.csdn.net/hxxjxw/article/details/104121304