1 初始准备

清理环境做准备

 clean up
rm(list=ls())

# load package arules to find rules
library(arules)

2 读取文件

读取文件，查看文件内容，发现需求给予的3 张表分别为，交易信息表，商品列表，商品订单表，
按照PA字段将交易信息表和商品订单表连接到一起

read purchase data on items level

trans <- read.csv('purchase.csv',header = TRUE,stringsAsFactors = FALSE )
RA_desc <- read.csv('RA_desc.csv',header = TRUE,stringsAsFactors = FALSE )
PA_desc <- read.csv('PA_desc.csv',header = TRUE,stringsAsFactors = FALSE )
dim(trans)
head(trans)
head(PA_desc)
names(trans) <- c("card","transaction_place","transaction_time","pos_id","transaction_number","invo_num","amt_aft","amt_bft","quantity","PA","RA","product_code")

# merge with descriptions

trans2 <- merge(trans,PA_desc, by="PA", all.x=TRUE)

在这里插入图片描述

#write trans_is and items

trans2$trans_id = paste(trans2$card, trans2$transaction_place, trans2$transaction_time,
                        trans2$pos_id, trans2$transaction_number, sep='_')
View(head(trans2))



write.csv(subset(trans2,select=c("trans_id","PA_desc")), file = 'purchase2.csv', row.names = F)

trans3 = read.transactions('purchase2.csv', format = "single", sep = ",",
                           cols = c("trans_id", "PA_desc"), rm.duplicates=TRUE,header = TRUE)

最后将信息整理为订单编号和购物商品明细

在这里插入图片描述
trans3 = read.transactions(‘purchase2.csv’, format = “single”, sep = “,”,
cols = c(“trans_id”, “PA_desc”), rm.duplicates=TRUE,header = TRUE)
这步是将数据转化为交易订单格式，方便算法处理

3 Apriori算法生成规则

find rules with low confidence level

rules.all = apriori(trans3,
                   parameter = list(minlen=2,
                                    supp=0.008,
                                    conf=0.5,
                                    target = "rules"))
inspect(rules.all)



# find rules with reasonable confidence level
rules = apriori(trans3,
               parameter = list(minlen=2,
                                supp=0.005,
                                conf=0.05,
                                target = "rules"))
inspect(rules)

write(rules,"rules.csv",row.names=FALSE,sep="|")

前面一次关联算法规则处理过可能存在冗余规则于是对规则进行剪枝

adjust the format of results

quality(rules.all) <- round(quality(rules.all), digits=3)
rules.sorted <- sort(rules.all, by ='lift')
inspect(rules.sorted)

reules.pruned = rules.sorted
inspect(reules.pruned[1])

target:
a character string indicating the type of association mined. One of
“frequent itemsets”
“maximally frequent itemsets”
“closed frequent itemsets”
“rules” (only available for Apriori; use ruleInduction for eclat.)
“hyper edge sets” (only available for Apriori; see references for the definition of association hyperedgesets)

4 关联规则绘图

scatter plots

# plot rules.all
png(file="rules_all.png", bg="white", width=1200, height=900)
plot(rules.all)
dev.off()

在这里插入图片描述
多种规则画图

plot(reules.pruned)
plot(reules.pruned, measure=c("support", "lift"), shading="confidence")

plot(reules.pruned, shading="order", control=list(main = "Two-key plot"))

inspect(reules.pruned)

其它绘图

# sel <- plot(rules, measure=c("support", "lift"), shading="confidence", interactive=TRUE)

### matrix plots
plot(reules.pruned, method="matrix", measure="lift")
plot(reules.pruned, method="matrix", measure="lift", control=list(reorder=TRUE))

plot(reules.pruned, method="matrix", measure=c("lift", "confidence"))
plot(reules.pruned, method="matrix", measure=c("lift", "confidence"),control=list(reorder=TRUE))


### grouped matrix plots
# sel <- plot(reules.pruned, method="white", interactive=TRUE)
png(file="grouped_matrix.png", bg="transparent", width=1200, height=900)
plot(reules.pruned, method='grouped')
dev.off()

plot(reules.pruned, method='grouped',control=list(k=25))

### graph plots
subrules2 <- head(sort(reules.pruned, by="lift"), 25)

inspect(subrules2)
plot(subrules2, method="graph")
plot(subrules2, method="graph", control=list(type="items"))
plot(subrules2, method='graph', control = list(type="itemsets"))

在这里插入图片描述

parallel coordinates

plot(subrules2, method=‘paracoord’, control = list(reorder=T))

subrules3 <- head(sort(reules.pruned, by=“lift”), 3)
plot(subrules3, method=‘paracoord’, control = list(reorder=T))
inspect(subrules3)

93 R 推荐算法——关联规则

1 初始准备

2 读取文件

3 Apriori算法生成规则

4 关联规则绘图

parallel coordinates

猜你喜欢