R: ggplot2 Data Visualization - Basics

1 Installation

# Get ggplot2 easiest is to download the entire tidyverse: 
install.packages ( "tidyverse") 

# can also choose to download only ggplot2: 
install.packages ( "ggplot2") 

# or download the developer version on GitHub 
# install.packages ( " DevTools ") 
DevTools :: install_github (" tidyverse / ggplot2 ")

 2 Getting started

1 Basic Settings

Library (ggplot2) 
ggplot (diamonds) Case # in data set diamonds 
#gg <- ggplot (df, aes (x = xcol, y = ycol)) df where only data frame 
ggplot (diamonds, aes (x = carat )) # If only the X-axis value of the Y-Axis CAN bE specified in RESPECTIVE geoms. 
ggplot (Diamonds, AES (X = Carat, Y =. price)) # IF both X-and the Y axes are Fixed for All Layers. 
ggplot (Diamonds , aes (x = carat, color = cut)) # 'cut' variable for each single color type, GEOM IS a Once added. 

#aes ggplot2 format representative of beautifying the X and Y axes and also as color, size, shape, and other formatting same color (not based on variable data frame), is disposed outside the required AES () 

ggplot (Diamonds, AES (X = Carat), color = "SteelBlue")

2 layer

ggplot2 layer is also called a ' geoms '. Once the initial settings, and then add the above can be different layers in this documentation  after all of the information provided in layers, layer increases, the graphic will be displayed.

library(ggplot2)
gg <- ggplot(diamonds, aes(x=carat, y=price)) 
gg + geom_point()

gg + geom_point (size = 1, shape = 1, color = "steelblue", stroke = 2) # 'stroke' border width control point static format

gg + geom_point (aes (size = carat, shape = cut, color = color, stroke = carat)) # carat, cut color box dynamically set variable according to the data format

ggplot (Diamonds, AES (X = Carat, Y =. price, Color = Cut)) + geom_point () + geom_smooth () # Adding Scatterplot geom (Layer1) and Smoothing geom (Layer2). 
# or custom inside geom layer beautify format
ggplot(diamonds) + geom_point(aes(x=carat, y=price, color=cut)) + geom_smooth(aes(x=carat, y=price, color=cut))

#把不同平滑曲线整合成一条
library(ggplot2) ggplot(diamonds) + geom_point(aes(x=carat, y=price, color=cut)) + geom_smooth(aes(x=carat, y=price)) # Remove color from geom_smooth ggplot(diamonds, aes(x=carat, y=price)) + geom_point(aes(color=cut)) + geom_smooth() # same but simpler

# 把不同颜色的散点的形状设成不同的
ggplot(diamonds, aes(x=carat, y=price, color=cut, shape=color)) + geom_point()

添加水平或者垂直线

p1 <- gg3 + geom_hline(yintercept=5000, size=2, linetype="dotted", color="blue") # linetypes: solid, dashed, dotted, dotdash, longdash and twodash
p2 <- gg3 + geom_vline(xintercept=4, size=2, color="firebrick")#添加垂直线
p3 <- gg3 + geom_segment(aes(x=4, y=5000, xend=4, yend=10000, size=2, lineend="round"))#添加方块
p4 <- gg3 + geom_segment(aes(x=carat, y=price, xend=carat, yend=price-500, color=color), size=2) + coord_cartesian(xlim=c(3, 5))  # x, y: start points. xend, yend: end points
gridExtra::grid.arrange(p1,p2,p3,p4, ncol=2)

3 标签

使用 labs 层来自定义标签

library(ggplot2)
gg <- ggplot(diamonds, aes(x=carat, y=price, color=cut)) + geom_point() + labs(title="Scatterplot", x="Carat", y="Price")  # 增加坐标轴和图像标题
print(gg)#保存图形

 4 主题和格式调整

使用Theme函数控制标签的尺寸、颜色等,在element_text()函数内自定义具体的格式,想要清除格式,则设为element_blank()即可

gg1 <- gg + theme(plot.title=element_text(size=30, face="bold"), 
                  axis.text.x=element_text(size=15), #x轴文本
                  axis.text.y=element_text(size=15),
                  axis.title.x=element_text(size=25),
                  axis.title.y=element_text(size=25)) + 
  scale_color_discrete(name="Cut of diamonds")  # add title and axis text, 改变图例标题
#scale_shape_discrete(name="legend title") 基于离散分类变量生成对应图例标题
#scale_shape_continuous(name="legend title") 基于连续变量 shape fill color属性
print(gg1)  

#改变图形中所有文本的颜色等
gg2 + theme(text=element_text(color="blue"))  # all text turns blue.
#改变点的颜色
gg3 + scale_colour_manual(name='Legend', values=c('D'='grey', 'E'='red', 'F'='blue', 'G'='yellow', 'H'='black', 'I'='green', 'J'='firebrick'))

颜色表:

调整x y轴范围

三种方法:

  1. Using coord_cartesian(xlim=c(x1,x2))
  2. Using xlim(c(x1,x2))
  3. Using scale_x_continuous(limits=c(x1,x2))  注意:第2、3种方法会删除数据框中不在范围之内的点的信息
#调整x y 轴范围
gg3 + coord_cartesian(xlim=c(0,3), ylim=c(0, 5000)) + geom_smooth()  # zoom in

#删除坐标范围之外的点 注意这时候平滑线也会相应改变 可能会误导分析
gg3 + scale_x_continuous(limits=c(0,3)) + scale_y_continuous(limits=c(0, 5000)) + geom_smooth()  # deletes the points outside limits
#> Warning message:
#> Removed 14714 rows containing missing values (geom_point). 

#改变x y轴标签 间隔等 
gg3 + scale_x_continuous(labels=c("zero", "one", "two", "three", "four", "five")) + scale_y_continuous(breaks=seq(0, 20000, 4000))  # Y 是连续变量  X 是类型变量

#旋转文本角度
gg3 + theme(axis.text.x=element_text(angle=45), axis.text.y=element_text(angle=45))
gg3 + coord_flip()  #把x和y轴对换
#设置图形内背景网格
gg3 + theme(panel.background = element_rect(fill = 'springgreen'),
  panel.grid.major = element_line(colour = "firebrick", size=3),
  panel.grid.minor = element_line(colour = "blue", size=1))

图形背景与边距

#设置图形外背景颜色和边距
gg3 + theme(plot.background=element_rect(fill="yellowgreen"), plot.margin = unit(c(2, 4, 1, 3), "cm")) # top, right, bottom, left

 图例

gg3 + scale_color_discrete(name="")  # 删除图例标题
p1 <- gg3 + theme(legend.title=element_blank())  # 删除图例标题
p2 <- gg3 + scale_color_discrete(name="Diamonds")  # 改变图例标题
gg3 + scale_colour_manual(name='Legend', values=c('D'='grey', 'E'='red', 'F'='blue', 'G'='yellow', 'H'='black', 'I'='green', 'J'='firebrick'))# 改变图例标题和点颜色

#隐藏图例标题
gg3 + theme(legend.position="none")  # hides the legend
#改变图例位置
p1 <- gg3 + theme(legend.position="top")  # top / bottom / left / right 图形外
#图形内
p2 <- gg3 + theme(legend.justification=c(1,0), legend.position=c(1,0))  # legend justification 是图例的定标点 把图例的左下点作为 (0,0)
gridExtra::grid.arrange(p1, p2, ncol=2) 
#相当于library(gridExtra)
#grid.arrange(p1, p2, ncol=2)  

#改变图例具体项目的顺序 按照需求在图例中创建一个新的类型变量
df$newLegendColumn <- factor(df$legendcolumn, levels=c(new_order_of_legend_items), ordered = TRUE) 

#legend.title - 图例标题
#legend.text - 图例文本
#legend.key - 图例背景框
#guides - 图例符号
gg3 + theme(legend.title = element_text(size=20, color = "firebrick"), legend.text = element_text(size=15), legend.key=element_rect(fill='steelblue')) + guides(colour = guide_legend(override.aes = list(size=2, shape=4, stroke=2)))  
# legend title color and size, box color, symbol color, size and shape.

5 多图绘制

gg1 + facet_wrap( ~ cut, ncol=3)  # cut类型变量的每种类型是一个图 设置为三列
gg1 + facet_wrap(color ~ cut)  # row: color, column: cut 左边的对应行 右边的对应列

gg1 + facet_wrap(color ~ cut, scales="free")  # row: color, column: cut 释放尺度限制

gg1 + facet_grid(color ~ cut)   # 为方便比较 把所有图片放在网格中 头信息去掉 更多的空间给图形

 

 6 一些经常用到的特征

制作时间序列图形(使用ggfortify)

使用ggfortify包很容易直接用一个时间序列对象来画时间序列图形,而不用把数据类型转换为数据框,更多请见

#下载ggfortify包
library(devtools)
install_github('sinhrks/ggfortify')

ggfortify 使得 ggplot2 知道怎么解译 ts 对象. 加载 ggfortify 包后, 你可以使用 ggplot2::autoplot 函数来操作 ts 对象

library(ggfortify)
autoplot(AirPassengers) + labs(title="AirPassengers")  # where AirPassengers is a 'ts' object

autoplot(AirPassengers, ts.colour = 'red', ts.linetype = 'dashed')#改变线的颜色和类型
#使用 help(autoplot.ts) (or help(autoplot.*) for any other objects) 来查询可以改变的选项

autoplot 也能处理其他时间序列类型. 支持的包有:

  • zoo::zooreg
  • xts::xts
  • timeSeries::timSeries
  • tseries::irts
library(xts)
autoplot(as.xts(AirPassengers), ts.colour = 'green') 

 也能通过命名改变{ggplot2} 几何图形类型. 支持线、条形、点图

autoplot(AirPassengers, ts.geom = 'bar', fill = 'blue')
autoplot(AirPassengers, ts.geom = 'point', shape = 3)

同一张图上画多个时间序列

要求数据是数据框类型,且一列必须为时间数据

(1)转换成数据框后,累加层

# Approach 1:
data(economics, package="ggplot2")  #  数据初始化
economics <- data.frame(economics)  # 转换为数据框类型
ggplot(economics) + geom_line(aes(x=date, y=pce, col="pcs")) + geom_line(aes(x=date, y=unemploy, col="unemploy")) + scale_color_discrete(name="Legend") + labs(title="Economics") # 画多条线 使用 'geom_line's

(2)使用 reshape2::melt 设置 id 到日期格式来合并数据框. 然后增加一个 geom_line 把颜色格式设置为variable (此变量是在合并过程中被创建).

# Approach 2:
library(reshape2)
df <- melt(economics[, c("date", "pce", "unemploy")], id="date")
ggplot(df) + geom_line(aes(x=date, y=value, color=variable)) + labs(title="Economics")# plot multiple time series by melting

 

条形图

ggplot 默认创建的是 ‘counts’ 型的条形图,即计算某一列变量中每种值出现的频数,这时候无需指定y轴的变量

但是呢,如果想具体指定y轴的值,这时候一定要在geom_bar内设置stat="identity"

# 绝对条形图: Specify both X adn Y axis. Set stat="identity"
df <- aggregate(mtcars$mpg, by=list(mtcars$cyl), FUN=mean)  # 计算每个'cyl'对应的mpg变量均值
names(df) <- c("cyl", "mpg")#为数据框增加变量名字
head(df)
#>   cyl    mpg
#> 1   4  26.66
#> 2   6  19.74
#> 3   8  15.10

gg_bar <- ggplot(df, aes(x=cyl, y=mpg)) + geom_bar(stat = "identity")  # Y axis is explicit. 'stat=identity'
print(gg_bar)

 

 改变条形图的颜色和宽度

df$cyl <- as.factor(df$cyl)#把cyl作为类型变量
gg_bar <- ggplot(df, aes(x=cyl, y=mpg)) + geom_bar(stat = "identity", aes(fill=cyl), width = 0.25)
gg_bar + scale_fill_manual(values=c("4"="steelblue", "6"="firebrick", "8"="darkgreen"))

改变颜色

library(RColorBrewer)
display.brewer.all(n=20, exact.n=FALSE)  # 展示所有颜色方案
ggplot(mtcars, aes(x=cyl, y=carb, fill=factor(cyl))) + geom_bar(stat="identity") + scale_fill_brewer(palette="Reds")  # "Reds" is palette name

gg <- ggplot(mtcars, aes(x=cyl))
p1 <- gg + geom_bar(position="dodge", aes(fill=factor(vs)))  # side-by-side 并列
p2 <- gg + geom_bar(aes(fill=factor(vs)))  # stacked 堆积
gridExtra::grid.arrange(p1, p2, ncol=2)

 折线图

# 方法 1:
gg <- ggplot(economics, aes(x=date))  # 基本设置
gg + geom_line(aes(y=psavert), size=2, color="firebrick") + geom_line(aes(y=uempmed), size=1, color="steelblue", linetype="twodash")  #没有图例
# 折线类型有: solid, dashed, dotted, dotdash, longdash and twodash

# 方法 2:
library(reshape2)
df_melt <- melt(economics[, c("date", "psavert", "uempmed")], id="date")  # melt by date. 
gg <- ggplot(df_melt, aes(x=date))  # setup
gg + geom_line(aes(y=value, color=variable), size=1) + scale_color_discrete(name="Legend")  # gets legend.有图例

 

丝带图 

使用 geom_ribbon()画填充时间序列图 需要 ymin and ymax 两个参量

# Prepare the dataframe
st_year <- start(AirPassengers)[1] #开始年份
st_month <- "01"
st_date <- as.Date(paste(st_year, st_month, "01", sep="-"))#开始日期
dates <- seq.Date(st_date, length=length(AirPassengers), by="month")#生产日期数组 以月为间隔
df <- data.frame(dates, AirPassengers, AirPassengers/2)#一定要记得构建数据框
head(df)
#>        dates AirPassengers AirPassengers.2
#> 1 1949-01-01           112            56.0
#> 2 1949-02-01           118            59.0
#> 3 1949-03-01           132            66.0
#> 4 1949-04-01           129            64.5
#> 5 1949-05-01           121            60.5
#> 6 1949-06-01           135            67.5
# Plot ribbon with ymin=0
gg <- ggplot(df, aes(x=dates)) + labs(title="AirPassengers") + theme(plot.title=element_text(size=30), axis.title.x=element_text(size=20), axis.text.x=element_text(size=15))
gg + geom_ribbon(aes(ymin=0, ymax=AirPassengers)) + geom_ribbon(aes(ymin=0, ymax=AirPassengers.2), fill="green")

gg + geom_ribbon(aes(ymin=AirPassengers-20, ymax=AirPassengers+20)) + geom_ribbon(aes(ymin=AirPassengers.2-20, ymax=AirPassengers.2+20), fill="green")

 

 区域图

geom_area和 geom_ribbon类似,只是 ymin设置为 0,如果想画重叠的区域图,使用 alpha aesthetic 使得最外层为透明的

# Method1: 非重叠区域
df <- reshape2::melt(economics[, c("date", "psavert", "uempmed")], id="date")
head(df, 3)
#>         date variable value
#> 1 1967-07-01  psavert  12.5
#> 2 1967-08-01  psavert  12.5
#> 3 1967-09-01  psavert  11.7
p1 <- ggplot(df, aes(x=date)) + geom_area(aes(y=value, fill=variable)) + labs(title="Non-Overlapping - psavert and uempmed")

# Method2: 重叠区域 PS:因为没有构建成数据框,也就相应没有图例啦
p2 <- ggplot(economics, aes(x=date)) + geom_area(aes(y=psavert), fill="yellowgreen", color="yellowgreen") + geom_area(aes(y=uempmed), fill="dodgerblue", alpha=0.7, linetype="dotted") + labs(title="Overlapping - psavert and uempmed")
gridExtra::grid.arrange(p1, p2, ncol=2)

 

 箱形图和小提琴图

可以使用: * outlier.shape * outlier.stroke * outlier.size * outlier.colour 来控制异常点的形状 大小 边缘

如果 notch 被设为 TRUE,见下图

p1 <- ggplot(mtcars, aes(factor(cyl), mpg)) + geom_boxplot(aes(fill = factor(cyl)), 
width=0.5, outlier.colour = "dodgerblue", outlier.size = 4, outlier.shape = 16, outlier.stroke = 2, notch=T) + labs(title="Box plot") # boxplot p2 <- ggplot(mtcars, aes(factor(cyl), mpg)) + geom_violin(aes(fill = factor(cyl)), width=0.5, trim=F) + labs(title="Violin plot (untrimmed)") # violin plot gridExtra::grid.arrange(p1, p2, ncol=2)

 

 密度图

ggplot(mtcars, aes(mpg)) + geom_density(aes(fill = factor(cyl)), size=2) + labs(title="Density plot")  # Density plot

 

 瓦片图(热力图)

corr <- round(cor(mtcars), 2)#生成相关系数矩阵 对称的
df <- reshape2::melt(corr)
gg <- ggplot(df, aes(x=Var1, y=Var2, fill=value, label=value)) + geom_tile() + theme_bw() + geom_text(aes(label=value, size=value), color="white") + labs(title="mtcars - Correlation plot") + theme(text=element_text(size=20), legend.position="none")

library(RColorBrewer)
p2 <- gg + scale_fill_distiller(palette="Reds")
p3 <- gg + scale_fill_gradient2()
gridExtra::grid.arrange(gg, p2, p3, ncol=3)

相同坐标轴范围

ggplot(diamonds, aes(x=price, y=price+runif(nrow(diamonds), 100, 10000), color=cut)) + geom_point() + geom_smooth() + coord_equal()

自定义布局

gridExtra包能在一个网格中安排放置多个图形

library(gridExtra)
grid.arrange(plot1, plot2, ncol=2)

 改变主题

切换不同的内置主题:

  1. theme_gray()
  2. theme_bw()
  3. theme_linedraw()
  4. theme_light()
  5. theme_minimal()
  6. theme_classic()
  7. theme_void()

 ggthemes 包提供 另外的主题 这些主题模仿啦一些著名杂志或者软件的风格

#从 CRAN下载稳定版
install.packages('ggthemes', dependencies = TRUE)
#或者下载开发者版本
library("devtools")
install_github(c("hadley/ggplot2", "jrnold/ggthemes"))
ggplot(diamonds, aes(x=carat, y=price, color=cut)) + geom_point() + geom_smooth() +theme_bw() + labs(title="bw Theme")

 注记

library(grid)
my_grob = grobTree(textGrob("This text is at x=0.1 and y=0.9, relative!\n Anchor point is at 0,0", x=0.1,  y=0.9, hjust=0,gp=gpar(col="firebrick", fontsize=25, fontface="bold")))

ggplot(mtcars, aes(x=cyl)) + geom_bar() + annotation_custom(my_grob) + labs(title="Annotation Example")

保存图片

plot1 <- ggplot(mtcars, aes(x=cyl)) + geom_bar()
ggsave("myggplot.png")  # 保存最近创建的图片
ggsave("myggplot.png", plot=plot1)  #保存指定的图形

相关链接:

非常有用:https://ggplot2.tidyverse.org/reference/

Cheatsheets:http://www.rstudio.com/wp-content/uploads/2015/12/ggplot2-cheatsheet-2.0.pdf

教程:http://r-statistics.co/ggplot2-Tutorial-With-R.html

https://ggplot2.tidyverse.org/

时间序列画图包:http://rpubs.com/sinhrks/plot_ts

主题“https://github.com/jrnold/ggthemes

Guess you like

Origin www.cnblogs.com/icydengyw/p/11481439.html