library(readxl)
orders<-read_excel("global-superstore.xlsx","订单")
str(orders) #查看数据
library(tidyr)
orders0<- orders %>%
separate(col =orderid,
into =c('center','year','product'),
sep ='-')
str(orders0) #查看数据
# 例1:求出各个市场的总销售额
orders %>% group_by(market) %>%
summarise(totalsales = sum(sales))
# 例2:用图形表示各个市场总销售额
library(dplyr)
library(ggplot2)
orders %>% group_by(market) %>%
summarise(totalsales = sum(sales)) %>%
ggplot(.,mapping=aes(x=market,y=totalsales))+
geom_bar(stat='identity')+
xlab('市场')+ylab('总销售额')
# 总利润
library(dplyr)
library(ggplot2)
orders %>% group_by(market) %>%
summarise(totalprofit = sum(profit)) %>%
ggplot(.,mapping=aes(x=market,y=totalprofit))+
geom_bar(stat='identity')+
xlab('市场')+ylab('总利润')
# 例3:拉丁美洲各地区的总销售额
orders %>% filter(market == '拉丁美洲') %>%
group_by(area) %>%
summarise(totalsales = sum(sales))
# 例4:求出采购量最大的五个客户
orders %>%
group_by(custname) %>%
summarise(totalsales = sum(sales)) %>%
top_n(5, totalsales) %>%
arrange(desc(totalsales))
# 例5:求出每个客户采购量最大的两个类型的产品
orders %>% group_by(custname,type)%>%
summarise(totalsales=sum(sales))%>%
top_n(2,totalsales)
# 例6:各个市场不同类别的细分市场的销售数量
y_axis_formatter=function(x){
return(paste(x/1000,'K',sep=""))
}
ggplot(orders,aes(x=segment,y=quantity))+
geom_bar(stat='identity')+
facet_grid(type~market)+
scale_y_continuous(labels=y_axis_formatter)+
xlab("细分市场")+
ylab("数量")+
coord_flip()
# 例7:按年份汇总销售量
orders%>%mutate(year=lubridate::year(purchasedate))%>%
group_by(year)%>%
summarise(totalsales=sum(sales))
# 例8.1:不同年份各季度的汇总
orders%>%mutate(year=lubridate::year(purchasedate),
quarter=lubridate::quarter(purchasedate)
)%>%
group_by(year,quarter)%>%
summarise(totalsales=sum(sales))%>%
ggplot(.,aes(x=year,y=totalsales))+
geom_line()+
facet_grid(.~quarter)
# 例8.2:不同年份各季度的汇总
orders%>%mutate(year=lubridate::year(purchasedate),
quarter=lubridate::quarter(purchasedate)
)%>%
group_by(year,quarter)%>%
summarise(totalsales=sum(sales))%>%
ggplot(.,aes(x=quarter,y=totalsales))+
geom_line()+
facet_grid(.~year)
# 例8.3:横轴是季度,以不同折线表示年份
orders%>%mutate(year=
lubridate::year(purchasedate),
quarter=
lubridate::quarter(purchasedate)
)%>%
group_by(year,quarter)%>%
summarise(totalsales=sum(sales))%>%
ggplot(.,aes(x=quarter,y=totalsales,colour=
factor(year)))+
geom_line()
# 例9:不同年份按月汇总
orders%>%mutate(year=lubridate::year(purchasedate),
month=lubridate::month(purchasedate)
)%>%
group_by(year,month)%>%
summarise(totalsales=sum(sales))%>%
ggplot(.,aes(x=month,y=totalsales,colour=
factor(year)))+
geom_line()
# 例10:算同比增长率
orders%>%mutate(year=lubridate::year(purchasedate),
month=lubridate::month(purchasedate))%>%
group_by(month,year)%>%
summarise(totalsales=sum(sales))%>%
mutate(ratio=(totalsales- lag(totalsales))/lag(totalsales))%>%
ggplot(.,aes(x=month,y=ratio,colour=factor(year)))+
geom_line()
# 例11:不同类别各年按月汇总的趋势
orders%>%mutate(year=
lubridate::year(purchasedate),
month=lubridate::month(purchasedate))%>%
group_by(type,year,month)%>%
summarise(totalsales=sum(sales))%>%
ggplot(.,aes(x=month,y=totalsales,
colour=factor(year)))+
geom_line()+
scale_x_continuous(breaks=1:12)+
facet_grid(type~.)
# 例12:各类别产品各年各月的总销售额
orders%>%mutate(year=lubridate::year(purchasedate),
month=lubridate::month(purchasedate,label=T))%>%
group_by(type,year,month)%>%
summarise(totalsales=sum(sales))%>%
tidyr::spread(month,totalsales)