1 Initial preparation
Prepare the environment
clean up
rm(list=ls())
# load package arules to find rules
library(arules)
2 Read the file
Read the file, check the content of the file, and find that the three tables that need to be given are: transaction information table, commodity list, and commodity order table.
According to the PA field, the transaction information table and the commodity order table are connected together.
read purchase data on items level
trans <- read.csv('purchase.csv',header = TRUE,stringsAsFactors = FALSE )
RA_desc <- read.csv('RA_desc.csv',header = TRUE,stringsAsFactors = FALSE )
PA_desc <- read.csv('PA_desc.csv',header = TRUE,stringsAsFactors = FALSE )
dim(trans)
head(trans)
head(PA_desc)
names(trans) <- c("card","transaction_place","transaction_time","pos_id","transaction_number","invo_num","amt_aft","amt_bft","quantity","PA","RA","product_code")
# merge with descriptions
trans2 <- merge(trans,PA_desc, by="PA", all.x=TRUE)
#write trans_is and items
trans2$trans_id = paste(trans2$card, trans2$transaction_place, trans2$transaction_time,
trans2$pos_id, trans2$transaction_number, sep='_')
View(head(trans2))
write.csv(subset(trans2,select=c("trans_id","PA_desc")), file = 'purchase2.csv', row.names = F)
trans3 = read.transactions('purchase2.csv', format = "single", sep = ",",
cols = c("trans_id", "PA_desc"), rm.duplicates=TRUE,header = TRUE)
Finally, organize the information into order number and shopping item details
trans3 = read.transactions('purchase2.csv', format = "single", sep = ",",
cols = c("trans_id", "PA_desc"), rm.duplicates=TRUE, header = TRUE)
This step is Convert data into transaction order format for easy algorithm processing
3 Apriori algorithm generation rules
find rules with low confidence level
rules.all = apriori(trans3,
parameter = list(minlen=2,
supp=0.008,
conf=0.5,
target = "rules"))
inspect(rules.all)
# find rules with reasonable confidence level
rules = apriori(trans3,
parameter = list(minlen=2,
supp=0.005,
conf=0.05,
target = "rules"))
inspect(rules)
write(rules,"rules.csv",row.names=FALSE,sep="|")
The previous association algorithm rule has dealt with possible redundant rules, so the rules are pruned
adjust the format of results
quality(rules.all) <- round(quality(rules.all), digits=3)
rules.sorted <- sort(rules.all, by ='lift')
inspect(rules.sorted)
reules.pruned = rules.sorted
inspect(reules.pruned[1])
target:
a character string indicating the type of association mined. One of
“frequent itemsets”
“maximally frequent itemsets”
“closed frequent itemsets”
“rules” (only available for Apriori; use ruleInduction for eclat.)
“hyper edge sets” (only available for Apriori; see references for the definition of association hyperedgesets)
4 Association rule drawing
scatter plots
# plot rules.all
png(file="rules_all.png", bg="white", width=1200, height=900)
plot(rules.all)
dev.off()
Drawing of various rules
plot(reules.pruned)
plot(reules.pruned, measure=c("support", "lift"), shading="confidence")
plot(reules.pruned, shading="order", control=list(main = "Two-key plot"))
inspect(reules.pruned)
other drawings
# sel <- plot(rules, measure=c("support", "lift"), shading="confidence", interactive=TRUE)
### matrix plots
plot(reules.pruned, method="matrix", measure="lift")
plot(reules.pruned, method="matrix", measure="lift", control=list(reorder=TRUE))
plot(reules.pruned, method="matrix", measure=c("lift", "confidence"))
plot(reules.pruned, method="matrix", measure=c("lift", "confidence"),control=list(reorder=TRUE))
### grouped matrix plots
# sel <- plot(reules.pruned, method="white", interactive=TRUE)
png(file="grouped_matrix.png", bg="transparent", width=1200, height=900)
plot(reules.pruned, method='grouped')
dev.off()
plot(reules.pruned, method='grouped',control=list(k=25))
### graph plots
subrules2 <- head(sort(reules.pruned, by="lift"), 25)
inspect(subrules2)
plot(subrules2, method="graph")
plot(subrules2, method="graph", control=list(type="items"))
plot(subrules2, method='graph', control = list(type="itemsets"))
parallel coordinates
plot(subrules2, method=‘paracoord’, control = list(reorder=T))
subrules3 <- head(sort(reules.pruned, by=“lift”), 3)
plot(subrules3, method=‘paracoord’, control = list(reorder=T))
inspect(subrules3)