R数据可视化-ggplot2包

ggplot2作图

qplot函数:
install.packages(“ggplot2”)
library(ggplot2)
#ggplot2自带的数据集diamonds
diamonds
getOption(“max.print”)
options(max.print=100000)
#作图
qplot(carat,price,data=diamonds)
qplot(log(carat),log(price),data=diamonds)
qplot(carat,xyz,data=diamonds)
#数据集的简化(建立随机种子后进行抽样为dsmall,作图:colour颜色,shape现状,alpha透明度)
set.seed(1410)
dsmall=diamonds[sample(nrow(diamonds),100),]
qplot(carat,price,data=dsmall,colour=color)
qplot(carat,price,data=dsmall,shape=cut)
qplot(carat,price,data=dsmall,alpha=I(1/10))
#用geom画不同类型的统计图
qplot(carat,price,data=diamonds,geom = c(“point”,“smooth”))
qplot(carat,price,data=dsmall,geom = c(“point”,“smooth”))
#弯曲程度取决于span
qplot(carat,price,data=dsmall,geom = c(“point”,“smooth”),span=1)
#method=“loess”,对于较小的n为缺省拟合方式
qplot(carat,price,data=dsmall,geom = c(“point”,“smooth”),method=“gam”,formula=y~s(x))
qplot(carat,price,data=dsmall,geom = c(“point”,“smooth”),method=“gam”,formula=y~s(x,bs=“cs”))

#箱图
qplot(color,price/carat,data=diamonds,geom = “boxplot”)
qplot(color,price/carat,data=diamonds,geom = “jitter”)
#直方图,binwidth为柱宽,xlim为x轴的范围
qplot(color,data=diamonds,geom =“histogram”,binwidth=0.5,xlim = c(0,3))

#画密度曲线图
qplot(color,data=diamonds,geom =“density”,colour=color)

#柱状图(分类(离散值)汇总)
qplot(color,data=diamonds,geom =“bar”)
qplot(color,data=diamonds,geom =“bar”,weight=carat)+scale_y_continuous(“carat”)

#画连线图
economics
qplot(data,unemploy/pop,data = economics,geom = “line”)

#路径表达式
qplot(unemploy/pop,uempmed,data = economics,geom = c(“point”,“path”))

#彩色路径
year<-function(x) as.POSIXlt(x)$year+1900
注:POSIXct格式的时间:以有符号整数形式存储,表示从1970-01-01到该时间点经过的秒数
POSIXlt格式的时间:以字符串形式存储,包含年月日等
“05/27/84” 对应格式 “%m/%d/%y”
“May 27 1984” 对应格式 “%B %d %Y”
qplot(unemploy/pop,uempmed,data = economics,geom = “path”,colour=year(date))
qplot(unemploy/pop,uempmed,data = economics,geom = “path”,colour=year(date))+scale_size_area()

#分组画法
qplot(carat,data=diamonds,facets = color~.,geom =“histogram”,binwidth=0.1,xlim = c(0,3))

#散点图,x轴标注,y轴标注,标题,x和y轴范围设置
qplot(displ, hwy, data = mpg, colour = factor(cyl))
qplot(carat,price/carat,data=dsmall,ylab = expression(frac(price.carat)),xlab = “Weight(carat)”,main=“Small diamonds”,xlim = c(.2,1))

#对数变换(对x和y轴同时进行对数变换)
qplot(carat,price,data=dsmall,log = “xy”)

ggplot函数:
ggplot函数调用基本形式:
ggplot(数据框形式的数据,aes())
##这一步只是创建了一个ggplot的对象,还未加入图层,尚且什么也看不见
layer(geom, geom_params, stat, stat_params, data, mapping,position)
layer(几何对象,几何变换参数,统计量,统计变换参数,数据,映射,位置调整)
因为几何变换和统计变化是对应的,故只需要+geom_xxx()或者+stat_xxx()图层
图层以
geom_XXX(data,mapping,…,stat,position)
stat_XXX(data,mapping,…,geom,position)的形式出现.
其中mapping指的是映射(图层属性aes),position是调整对象的重合方式。一幅图中可多图层叠加,熟悉映射概念及其属性,便能迅速画图达到良好效果。
mapping中的aes可调参数大致分成4类:

类别 参数
第一类(坐标轴) x,y
第二类(颜色) colour,fill,alpha
第三类(形状大小) linetype,shape,size
第四类(分组,排序) group,order

position包括dodge,fill,identity,jitter,stack五个位置

名称 描述
dodge 避免重叠,并排放置
fill 堆叠图像,将高度标准化为1
identity 不做任何变动
jitter 给点添加噪音,避免重叠
stack 将图像元素堆叠

p <- ggplot(diamonds, aes(carat,colour = cut))
p<-p+geom_histogram(binwidth = 0.02,fill=“steelblue”)
#stat_identity()和geom_point是一组对应
#geom_bar()和stat_bin()

一个完整的ggplot函数包括创建图形对象和添加图层
qplot(sleep_rem / sleep_total, awake, data = msleep,geom = c(“point”, “smooth”))
等价于:
ggplot(msleep, aes(sleep_rem / sleep_total, awake)) +geom_point() + geom_smooth()

mtcars <- transform(mtcars, mpg = mpg ^ 2)##使用transform函数修改数据框里变量的数据
p %+% mtcars ##更新图形对象里的数据集
##使用+aes()可以添加、修改、删除+aes(y=NULL)
p <- ggplot(mtcars, aes(mpg, wt, colour = cyl)) + geom_point()
p
##使用分组参数
##忘记时可以使用?geom_line 查看帮助
p<-ggplot(Oxboys, aes(age, height, group = Subject,colour=Subject))
p+geom_line()
p + geom_smooth(aes(group = Subject), method=“lm”, se = F)
##分组参数group的含义就是把data拆成不同的组,最后在图层用相同的方式渲染
#箱线图
boysbox <- ggplot(Oxboys, aes(Occasion, height)) + geom_boxplot()
boysbox + geom_line(aes(group = Subject), colour = “#3366FF”) #添加连线
将图层编辑保存起来,可方便调用
####半透明深蓝色回归线的图层
bestfit <- geom_smooth(method = “lm”, se = F,
colour = alpha(“steelblue”, 0.5), size = 2)
例:p<-ggplot(Oxboys, aes(age, height))+ bestfit
组合geoms和stats:
d <- ggplot(diamonds, aes(carat)) + xlim(0, 3)
##绘制计数面积图
d + stat_bin(aes(ymax = …count…), binwidth = 0.1, geom = “area”)
##绘制以频数计量散点大小的散点图
d + stat_bin(aes(size = …density…), binwidth = 0.1,geom = “point”, position=“identity”)
##绘制以频数填充颜色的瓦片图
d + stat_bin(aes(y = 1, fill = …count…), binwidth = 0.1,geom = “tile”, position=“identity”)
##绘制频率密度(纵坐标是频率除组距)直方图
ggplot(diamonds, aes(carat)) + geom_histogram(aes(y = …density…), binwidth = 0.1)

位置调整

p <- ggplot(diamonds, aes(clarity,fill=cut))
p+geom_bar()
p + geom_bar(position=‘stack’)##缺省为堆叠
p + geom_bar(position=‘fill’) #fill填充 与堆叠类似,但高度标准化为1
library(RColorBrewer)
p <- ggplot(diamonds, aes(clarity,fill=cut))
p+geom_bar()
p + geom_bar(position=‘stack’)+scale_fill_brewer(palette=“YlOrRd”) ##等价于palette=18

一个复杂例子:

require(nlme,quietly = TRUE,warn.conflicts = FALSE)
model <- lme(height ~ age, data = Oxboys, random = ~ 1 + age | Subject)
oplot <- ggplot(Oxboys, aes(age, height, group = Subject)) + geom_line()
oplot
age_grid <- seq(-1, 1, length = 10)
subjects <- unique(OxboysKaTeX parse error: Expected 'EOF', got '#' at position 68: …t = subjects) #̲定义age和Subject的组…height <- predict(model, preds) #预测
oplot + geom_line(data = preds, colour = “#3366FF”, size= 0.4)

#基本作图类型

df <- data.frame(x = c(3, 1, 5),y = c(2, 4, 6),label = c(“a”,“b”,“c”))
p <- ggplot(df, aes(x, y, label = label)) +xlab(NULL) + ylab(NULL)

#散点图
p + geom_point() + ggtitle(“geom_point”)

#柱形图
p + geom_bar(stat=“identity”) +ggtitle( “geom_bar(stat=“identity”)”)

#线图
p + geom_line() + ggtitle( “geom_line”)

#填充图
p + geom_area() + ggtitle(“geom_area”)

#路径图
p + geom_path() + ggtitle(“geom_path”)

#画分布的技巧(以cut分类的频率密度(纵坐标是频率除组距)直方图)
depth_dist <- ggplot(diamonds, aes(depth)) + xlim(58, 68)
depth_dist +geom_histogram(aes(y = …density…), binwidth = 0.1) +facet_grid(cut ~ .)
mpg2 <- subset(mpg, cyl != 5 & drv %in% c(“4”, “f”))
ggplot(mpg2,aes(cty))+geom_histogram(binwidth = 2) + facet_grid(. ~ cyl)
ggplot(mpg2,aes(cty))+geom_histogram(binwidth = 2) + facet_wrap(~ cyl,ncol=2)

#分布
depth_dist + geom_histogram(aes(fill = cut), binwidth = 0.1,position = “fill”)

#密度曲线图
depth_dist + geom_freqpoly(aes(y = …density…, colour = cut),binwidth = 0.1)

#通过散点形状和大小控制重叠
df <- data.frame(x = rnorm(2000), y = rnorm(2000))
norm <- ggplot(df, aes(x, y))
norm + geom_point(shape = “.”)

#通过透明度控制
norm + geom_point(colour = “black”,alpha= 1/5)

#扰动(jitter)表示法
td <- ggplot(diamonds, aes(table, depth)) +xlim(50, 70) + ylim(50, 70)
td + geom_point()
jit <- position_jitter(width = 0.5)
td + geom_jitter(position = jit, colour = “black”,alpha= 1/10)

#在美国地图上画出us.cities数据集中所有城市的位置,用适当的方式表达其人口(例如散点的直径)
#如果发生遮盖的情况也请适当地处理使展现的信息更全面,更合理,更美观

install.packages(“maps”)
library(maps)
library(ggplot2)
data(us.cities)
big_cities <- subset(us.cities,pop> 50000)
ggplot(big_cities,aes(long,lat))+borders(“state”,size=0.5)
tx_cities <- subset(us.cities,country.etc==“TX”)
ggplot(tx_cities,aes(long,lat))+borders(“county”,“texas”,colour=“grey70”)+geom_point(colour=“black”,alpha=0.5)

#调整默认参数,这里改变图例的外观
p <- ggplot(msleep, aes(sleep_total, sleep_cycle, colour = vore))
p+geom_point()
p +geom_point()+ scale_colour_hue(“What does\nit eat?”,
breaks = c(“herbi”, “carni”, “omni”, NA),
labels = c(“plants”, “meat”, “both”, “don’t know”))

#改变坐标(轴和图例)刻度值
p <- ggplot(mtcars,aes(cyl, wt,colour = cyl))+geom_point()
p + scale_x_continuous(breaks = c(5.5, 6.5))
p + scale_x_continuous(limits = c(5.5, 6.5))
p + scale_colour_gradient(breaks = c(5.5, 6.5))
p + scale_colour_gradient(limits = c(5.5, 6.5))

#指标对数变换
qplot(log10(carat), log10(price), data = diamonds)
#等价于
ggplot( diamonds,aes(carat, price))+geom_point() +scale_x_log10() + scale_y_log10()

#时间的表达
#注:scale_x_date(name=“x轴名称”,breaks=date_breaks(“1 month”),labels=date_format("%Y-%m"),limits=as.Date(c(“2014-01-01”,“2018-01-01”)))
plot <- qplot(date, psavert, data = economics, geom = “line”) +ylab(“Personal savings rate”) +geom_hline(yintercept=0, colour = “grey50”)
plot
library(scales)
plot + scale_x_date(breaks=date_breaks(“10 days”))
plot + scale_x_date(limits = as.Date(c(“2004-01-01”, “2005-01-01”)),date_labels="%y/%m/%d")

#设置标题(字号,颜色,水平调整,粗体,旋转)
theme(plot.title = element_text(size = 20,color=“red”,just=0,face=“bold”,angle=180))

#控制坐标线(背景线和画线型
theme(panel.grid.major = element_line(linetype = “dotted”))
theme(axis.line = element_line(size = 0.5,linetype = “dashed”))

#画函数图像,正态分布
p <- ggplot(data.frame(x=c(-3,3)), aes(x=x))
p + stat_function(fun = dnorm)

#自定义函数
myfun <- function(xvar) {
1/(1 + exp(-xvar + 10))
}
ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun)
##Return dnorm(x) for 0 < x < 2, and NA for all other x
dnorm_limit <- function(x) {
y <- dnorm(x)
y[x < 0 | x > 2] <- NA
return(y)
}
#ggplot() with dummy data
p <- ggplot(data.frame(x=c(-3, 3)), aes(x=x))
p + stat_function(fun=dnorm_limit, geom=“area”, fill=“blue”, alpha=0.2) +stat_function(fun=dnorm)

#让图形动起来
install.packages(“rgl”)
library(rgl)
plot3d(mtcars w t , m t c a r s wt, mtcars disp, mtcars$mpg, type=“s”, size=0.75, lit=FALSE)
play3d(spin3d())

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

发布了30 篇原创文章 · 获赞 0 · 访问量 338

猜你喜欢

转载自blog.csdn.net/hua_chang/article/details/105035911