Drawing Plots

Orders

  • Draw the Plot

geom_bar

ggplot(diamonds) +
  geom_bar(aes(cut))
##Levels that don't have any values will be omitted.Add `scale_x_discrete(drop = F)` to force it to display

##Exhibit the proportion of every `cut` on the y-axis
ggplot(diamonds) + 
  geom_bar(aes(cut, y = ..prop.., group = 1)) 

##Show different layers grouped by `clarity`
ggplot(diamonds) + 
  geom_bar(aes(cut, fill = clarity))
#Add `position = "fill"` outside `aes()` to fill the whole columns
#Add `position = "dodge"` outside `aes()` to show a bar plot for every `cut`

 geom_line 

#必须有两个常驻变量

#经典的清洗数据后画图展示
table2 %>%
  pivot_wider(names_from = "type",values_from = "count") %>%
  ggplot() +
  geom_line(aes(year,cases,group=country)) +
  geom_point(aes(year,cases,colour = country)) +
  scale_x_continuous(breaks = unique(table2$year))

#若不存在第三个变量,则设group=1
ggplot(dest_delay) +
  geom_line(aes(dest,avg_dep_delay,group=1))

 geom_histogram

ggplot(diamonds) +
  geom_histogram(aes(carat), binwidth = 0.2)
##This can be used to find clusters 
#同样可以添加 fill = color 之类的分类变量
#aes()中增加y=..density..将y轴表示为概率密度,其值可能大于1

#Filter the data:
smaller <- diamonds %>% 
  filter(carat < 3)
ggplot(smaller, aes(carat)) +
  geom_histogram(binwidth = 0.2)

#Count the number of every group:
diamonds %>% 
  count(cut_width(carat, 0.2)) 

geom_freqpoly

#有且仅有一个常驻变量

#展示连续变量分组后各组的数量分布
##use lines:
ggplot(diamonds) + 
  geom_freqpoly(aes(price,color = cut), binwidth = 500)

##replace count with density in y,as the number may differ so much
ggplot(diamonds) + 
  geom_freqpoly(aes(price,color = cut,y=..density..), binwidth = 500)

##show the difference of two situations discriminated by whether a variable is NA:
nycflights13::flights %>% 
  mutate(
    cancelled = is.na(dep_time),
    sched_hour = sched_dep_time %/% 100, #x %/% y	integer division 5%/%2 is 2
    sched_min = sched_dep_time %% 100, #x %% y	modulus (x mod y) 5%%2 is 1
    sched_dep_time = sched_hour + sched_min / 60
  ) %>% 
  ggplot() + 
    geom_freqpoly(aes(sched_dep_time,color = cancelled, y = ..density..), binwidth = 1/4)
#`color = displ > 5` can also categorize a variable

geom_tile

##the number of each combination
diamonds %>% 
  count(color, cut)

##a very special plot,which expressed the combination in the form of squares and shows the number by colors
diamonds %>% 
  count(color, cut) %>%  
  ggplot() +
    geom_tile(aes(color,cut,fill = n))

##avoid "facus head"(fill empty tiles)
mpg %>%
  count(class, drv) %>%
  complete(class, drv, fill = list(n = 0)) %>%
  ggplot() +
    geom_tile(aes(class, drv,fill = n))

##apply `geom_tile()` to CO
diamonds %>%
  mutate(
    carat_group = cut_width(carat,1)
  ) %>%
  count(carat_group, cut) %>%  
  ggplot() +
    geom_tile(aes(carat_group,cut,fill=n))
#`cut_width()` should be additionally named after a variable

geom_point and geom_smooth

#geom_smooth() has a parameter `group`;set `group=drv` to draw the lines of specific groups and remain the original color 

#draw a plot with specific lines
ggplot(mpg, aes(displ, hwy,color = class)) + 
  geom_point() + 
  geom_smooth(data = filter(mpg, class == "subcompact"), se = F) 

ggplot(mpg, aes(displ, hwy, color = class)) + 
  geom_point() + 
  geom_smooth(data = filter(mpg, class %in% c("compact", "subcompact") ), se = F)

##Add `position = "jitter"` to `geom_point()` in order to shake the points and avoid overlapping  

 

geom_boxplot

##use barplots:
ggplot(smaller) + 
  geom_boxplot(aes(carat,price,group=cut_width(carat, 0.1)))

#add varwidth = T to show the trend of the number of each group
ggplot(smaller) + 
  geom_boxplot(aes(carat,price,group=cut_width(carat, 0.1)),varwidth=T)

##use boxplots:
ggplot(mpg) +
  geom_boxplot(aes(reorder(class, hwy, FUN = median), hwy)) 
#reorder the median of "hwy" in each "class" from the smaller at the left side to the bigger at the right side

#use reorder() in this situation
smaller %>% 
  mutate(temp = cut_width(carat, 0.1)) %>%
  ggplot() +
  geom_boxplot(aes(reorder(temp,price,FUN=median), price, group = temp)) +
  coord_flip()  #avoid negative effects of long names
  •  Auxiliary

 Rename 

+ labs(x = "Month", y = "Destination", fill = "Departure Delay",title = "The Platform")

Data Processing

filter(between(y, 3, 20))  #throw the observations out of the range away

mutate(y = ifelse(y < 3 | y > 20, NA, y))  # ifelse(1,2,3) ;if 1 is true,the value is 2,else it is 3

facet_*

ggplot(mpg) + 
  geom_point(aes(displ, hwy))
#add `facet_wrap(~class, nrow = 2)` to divide the plot into several parts and let the row be 2
#add `facet_grid(drv ~ cyl)` to divide the plot based on the combination of `drv` and `cyl`,and `facet_grid(. ~ cyl)` by column, facet_grid(cyl ~ .) by row

coord_*

coord_flip  #exchange the x and y axises

coord_polar #turn the plot into the polar system,usually paired with geom_bar()

coord_cartesian #zoom in,for example `coord_cartesian(ylim = c(0, 5))`  

scale_x_continuous

 scale_x_continuous(breaks = unique(table2$year)) #exhibit the values on the x-axis neatly

Summarise

Function:

geom_point() draw points 

geom_smooth() draw smooth lines(`se=F`:conceal the confidence interval)

geom_line() draw straight lines(`group`:divide observations in groups and draw a plot for each)

geom_freqpoly() draw polygonal lines to show the number of one CO ( "binwidth" :the length of the group)

geom_bar()  draw bars to show the number of one CA (`fill`:color that filled the bar,for example `fill="blue"`)

geom_histogram()  draw bars to show the number of one CO ( "binwidth" :the length of the bin)

geom_boxplot() draw boxplots to show how one CO varies with one CA ("varwidth = T" to show the appearance of the number of each group) or two COs with group=cut_width()

geom_tile() draw "phoenix" square plot

Guess you like

Origin blog.csdn.net/weixin_51674826/article/details/116664966