五. 经销商销量分析

SQL代码

二级网点经销商销量数据(每年):
SELECT A., B., A.qty/B.totalqty as divqty from
(select dealer_code,sum(qty) as qty from dmk.dmk_sal_actual_sales_dtl
where bd_name=‘WZ01’ and period_wid>=20160101 and period_wid<=20161231 and region_level3_name not in (‘其它’,‘退供应商零部件’,‘委外材料’,‘废料’)
and dealer_code is not NULL and qty>=0
group by dealer_code) A,
(select sum(qty) as totalqty from dmk.dmk_sal_actual_sales_dtl
where bd_name=‘WZ01’ and period_wid>=20170101 and period_wid<=20171231 and region_level3_name not in (‘其它’,‘退供应商零部件’,‘委外材料’,‘废料’)
and dealer_code is not NULL and qty>=0) B
一级网点筛选区域的经销商销量数据:
SELECT A., B., A.qty/B.totalqty as divqty from
(select a.* from
(select yjdbm, sum(qty) as qty from dmk.dmk_sal_actual_sales_rep
where bd_code = ‘WZ01’ and period_wid >= 20170101 and period_wid <= 20171231
and region_level3_name not in (‘其它’,‘退供应商零部件’,‘委外材料’,‘废料’)
and region_level3_name is not null and yjdbm is not NULL
group by yjdbm) a
where a.qty>=0) A,
(select a.* from
(select sum(qty) as totalqty from dmk.dmk_sal_actual_sales_rep
where bd_code = ‘WZ01’ and period_wid >= 20170101 and period_wid <= 20171231
and region_level3_name not in (‘其它’,‘退供应商零部件’,‘委外材料’,‘废料’)
and region_level3_name is not null and yjdbm is not NULL ) a
where a.totalqty>=0 ) B

1、导入数据

library(xlsx)
#2016和2017年联合经销商销量数据
x=read.xlsx(“dealer_qty201617.xlsx”,sheetIndex=1,encoding = “UTF-8”)
#2016和2017年筛选区域后经销商销量数据
x=read.xlsx(“yjdbm_qty_201617.xlsx”,sheetIndex=1,encoding = “UTF-8”)
#2009-2018年筛选区域后经销商销量数据
x=read.xlsx(“sales_qty.xlsx”,sheetIndex=1,encoding = “UTF-8”)
#2009-2018年筛选区域后经销商销量数据(E/F类进一步分析数据)
x=read.xlsx(“sales_qty6.xlsx”,sheetIndex=1,encoding = “UTF-8”)
#标准化数据
X=cbind(x,scale(x$qty)) #读取聚类结果
write.xlsx(X,“dealer_qty201617.xlsx”)

2、观测数据分布

boxplot(x q t y ) o u t < b o x p l o t . s t a t s ( x qty) out <-boxplot.stats(x qty) o u t a = w h i c h ( x out a=which(x qty %in% out)

3、进行聚类分析

#层次聚类
d<-dist(xKaTeX parse error: Expected 'EOF', got '#' at position 47: … #̲输入数据,生成距离结构 #生成…qty)
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15)
wss[i] <- sum(kmeans(mydata,centers=i)KaTeX parse error: Expected 'EOF', got '#' at position 11: withinss) #̲##这里的wss(within…qty, col = km c l u s t e r ) X = c b i n d ( x , k m cluster) X=cbind(x,km cluster) #读取聚类结果
write.xlsx(X,“dealer_qty201617.xlsx”)

4、总销量和区间销量的相关性检验

#动态聚类,分类读取聚类结果(区间销量和经销商家数数据)
x=read.xlsx(“cal_qty.xlsx”,sheetIndex=1,encoding = “UTF-8”)
x=read.xlsx(“cal_qty6.xlsx”,sheetIndex=1,encoding = “UTF-8”) #(E/F类进一步分析数据)
km=kmeans(x q t y , 5 ) X = c b i n d ( t a b l e ( x qty,5) X=cbind(table(x yjdbm,km c l u s t e r ) , x cluster),x qty) #分类读取聚类结果
write.xlsx(X,“yjdbm_qty_2017_动态5.xlsx”)
#相关性分析
data=x[,2:7]
#相关系数的计算
cor(data,method = “pearson”)
cor(data,method = “spearman”)
#相关系数和显著性水平的计算
install.packages(“Hmisc”)
install.packages(“backports”)
library(backports)
library(Hmisc)
cor=rcorr(as.matrix(data))
COR=cbind(cor r , c o r r,cor P) #读取相关分析结果
write.xlsx(COR,“相关分析2.xlsx”)
#可视化相关性分析
install.packages(“PerformanceAnalytics”)
library(xts)
library(zoo)
library(PerformanceAnalytics)
chart.Correlation(data,histogram = TRUE,pch=19)

5、总销量和区间销量的回归分析

#回归分析
a=lm(x q t y   x qty~x year+x A + x A+x B+x C + x C+x D+x E + x E+x F)
summary(a)
#共线性检验
install.packages(“car”)
library(car)
vif(a)
#异方差检验
install.packages(“lmtest”)
library(lmtest)
bptest(a)
s1=step(a,direction=“backward”) #向后回归法
summary(s1)

6、EF销量占比与年销量回归分析

#经销商销量数据
X=read.xlsx(“cal_qty.xlsx”,sheetIndex=8,encoding = “UTF-8”)
#回归分析
x=log(X q t y 1 0000 ) y = X qty_10000) y=X EF_divqty
a=lm(y~x)
summary(a)
plot(x,y,xlim=c(0.5,2.5),ylim=c(0.2,0.7),xlab=“LN(年销量/10000)”,ylab=“EF销量占比”)
abline(a)
#回归预测值
pred0=data.frame(x=log(6:9))
pred1=data.frame(x=log(XKaTeX parse error: Expected 'EOF', got '#' at position 119: ….frame(a.pred) #̲添加预测值与与预测范围县 po…x,a.pred l w r , p c h = 16 , c o l = " D e e p P i n k " ) p o i n t s ( p r e d lwr,pch=16,col="DeepPink") points(pred x,a.pred$upr,pch=17,col=“DeepPink”)

7、经销商家数与区间销量的回归分析

#E类区间数据
x=read.xlsx(“cal_qty.xlsx”,sheetIndex=8,encoding = “UTF-8”)
#F类区间数据
x=read.xlsx(“cal_qty.xlsx”,sheetIndex=8,encoding = “UTF-8”)
#回归分析
a=lm(x y e a r + x year+x count+x d i v c o u n t + x divcount+x divqty)
a=step(a,direction=“backward”)
summary(a)
#共线性检验
install.packages(“car”)
library(car)
vif(a)
#异方差检验
install.packages(“lmtest”)
library(lmtest)
bptest(a)

1、聚类结果数据集:
select a.*
,case when b.Nom = 1 then ‘A’
when b.Nom = 2 then ‘B’
when b.Nom = 3 then ‘C’
when b.Nom = 4 then ‘D’
when b.Nom = 5 then ‘E’
else ‘F’
end as class
from dim.123 a
left join
(SELECT @Nom := @Nom + 1 AS Nom,T.簇1 from
(SELECT 簇1, min(div_qty) from dim.123
GROUP BY 簇1
ORDER BY min(div_qty) DESC
) T,(SELECT @Nom := 0) T3 ) b
on a.簇1=b.簇1
2、区间销量分析数据集:
select N2.year
,case when N1.A_qty is NULL then 0
else N1.A_qty
end as A_qty
,N2.B_qty,N3.C_qty,N4.D_qty,N5.E_qty,N6.F_qty,N.qty
,case when N1.A_count is NULL then 0
else N1.A_count
end as A_count
,N2.B_count,N3.C_count,N4.D_count,N5.E_count,N6.F_count
,round((N5.E_qty+N6.F_qty)/N.qty,6) as EF_divqty from
(select a.year,count(yjdbm) as B_count,sum(qty) as B_qty from dim.123 a
left join
(SELECT @Nom := @Nom + 1 AS Nom,T.簇1 from
(SELECT 簇1, min(div_qty) from dim.123
GROUP BY 簇1
ORDER BY min(div_qty) DESC) T,(SELECT @Nom := 0) T3 ) b
on a.簇1=b.簇1
where b.Nom=2 and a.year <>2009
group by a.year) N2
left join
(select a.year,count(yjdbm) as A_count,sum(qty) as A_qty from dim.123 a
left join
(SELECT @Nom1 := @Nom1 + 1 AS Nom,T.簇1 from
(SELECT 簇1, min(div_qty) from dim.123
GROUP BY 簇1
ORDER BY min(div_qty) DESC ) T,(SELECT @Nom1 := 0) T3 ) b
on a.簇1=b.簇1
where b.Nom=1 and a.year <>2009
group by a.year) N1
on N1.year=N2.year
left join
(select a.year,count(yjdbm) as C_count,sum(qty) as C_qty from dim.123 a
left join
(SELECT @Nom3 := @Nom3 + 1 AS Nom,T.簇1 from
(SELECT 簇1, min(div_qty) from dim.123
GROUP BY 簇1
ORDER BY min(div_qty) DESC ) T,(SELECT @Nom3 := 0) T3 ) b
on a.簇1=b.簇1
where b.Nom=3 and a.year <>2009
group by a.year) N3
on N3.year=N2.year
left join
(select a.year,count(yjdbm) as D_count,sum(qty) as D_qty from dim.123 a
left join
(SELECT @Nom4 := @Nom4 + 1 AS Nom,T.簇1 from
(SELECT 簇1, min(div_qty) from dim.123
GROUP BY 簇1
ORDER BY min(div_qty) DESC) T,(SELECT @Nom4 := 0) T3 ) b
on a.簇1=b.簇1
where b.Nom=4 and a.year <>2009
group by a.year) N4
on N4.year=N2.year
left join
(select a.year,count(yjdbm) as E_count,sum(qty) as E_qty from dim.123 a
left join
(SELECT @Nom5 := @Nom5 + 1 AS Nom,T.簇1 from
(SELECT 簇1, min(div_qty) from dim.123
GROUP BY 簇1
ORDER BY min(div_qty) DESC) T,(SELECT @Nom5 := 0) T3 ) b
on a.簇1=b.簇1
where b.Nom=5 and a.year <>2009
group by a.year) N5
on N5.year=N2.year
left join
(select a.year,count(yjdbm) as F_count,sum(qty) as F_qty from dim.123 a
left join
(SELECT @Nom6 := @Nom6 + 1 AS Nom,T.簇1 from
(SELECT 簇1, min(div_qty) from dim.123
GROUP BY 簇1
ORDER BY min(div_qty) DESC ) T,(SELECT @Nom6 := 0) T3 ) b
on a.簇1=b.簇1
where b.Nom=6 and a.year <>2009
group by a.year) N6
on N6.year=N2.year
left join
(select year,sum(qty) as qty from dim.123
where year <> 2009
group by year) N
on N.year=N2.year

3、相关和回归的分析算法语句

(1)相关分析
data=cbind(year,A,B,C,D,E,F,qty)
P_value=data.frame(cor(data,method = “pearson”))
P_cov=P_valueKaTeX parse error: Expected 'EOF', got '#' at position 23: … #̲读取相关系数 list(P_c…residuals #读取回归结果残差
pred=data.frame(pred)
fit=predKaTeX parse error: Expected 'EOF', got '#' at position 37: … #̲读取回归拟合值 lwr=pre…lwr #读取回归系数拟合值下界
upr=predKaTeX parse error: Expected 'EOF', got '#' at position 33: … #̲读取回归系数拟合值上界 coe…coefficients)
b=coeKaTeX parse error: Expected 'EOF', got '#' at position 33: … #̲读取回归系数估计值 P=coePr…t… #读取回归系数P值
R2=a$r.squared #读取回归方程拟合优度
list(fit=fit,lwr=lwr,upr=upr,resid=resid,b=b,P=P,R2=R2)

发布了30 篇原创文章 · 获赞 0 · 访问量 348

猜你喜欢

转载自blog.csdn.net/hua_chang/article/details/105034173