版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/lys1695227550/article/details/84332703
想学点推荐的东西,学了点PersonRank算法,网上资料挺多,做了些实践后些了下面的一些代码,只作为以后复习的资料:
不得不提到自己看过的一个博客,挺好,至少我看懂了在干嘛。
https://www.cnblogs.com/zhangchaoyang/articles/5470763.html
import numpy as np
import pymysql
class LoadDB():
def __init__(self):
pass
def readFromDB(self):
"""
从数据库中获用户与物品之间的喜欢关系数据,处理成矩阵模型后,并导出到本地CSV文件中
:return:
"""
db = pymysql.connect("localhost", "root", "admin")
cursor = db.cursor()
cursor.execute("use cherishmall;")
try:
# 查询所有的用户
cursor.execute("select user_id_lys from rate_lys group by user_id_lys")
useridTmpTuple = cursor.fetchall()
useridList = ['user_' + str(id[0]) for id in useridTmpTuple]
# 查询所有的商品
cursor.execute("select goods_id_lys from rate_lys group by goods_id_lys")
goodsidTmpTuple = cursor.fetchall()
goodsidList = ['goods_' + str(id[0]) for id in goodsidTmpTuple]
# 计算顶点个数
PointList = useridList + goodsidList
# 查询某用户喜欢的商品列表
M = np.array(np.zeros([len(PointList), len(PointList)]))
for i, userid in enumerate(useridTmpTuple):
cursor.execute("select goods_id_lys from rate_lys where user_id_lys='%s'" % (userid[0]))
likeGoodsList = cursor.fetchall()
for j, likeGood in enumerate(likeGoodsList):
findIndex = PointList.index('goods_' + str(likeGood[0]))
M[i][findIndex] = 1
tempM = M.T
modelM = M + tempM
# 处理完成模型后后,输出到csv
self.write2CSV(modelM, PointList)
# 关闭数据库连接
except Exception as e:
print("exception", e)
db.close()
def write2CSV(self,modelM,PointList):
"""
将矩阵模型导出到CSV文件中
:param modelM: 矩阵模型
:param PointList: 行/列数据含义
:return:
"""
header=np.array(PointList)
writeModel=np.vstack((header,modelM))
try:
np.savetxt('model.csv', writeModel, fmt='%s', delimiter=',')
except Exception as e:
print(e)
if __name__=="__main__":
LoadDB().readFromDB()
import numpy as np
import json
import datetime
from numpy.linalg import solve
import pymysql
import pickle
import time
import csv
from scipy.sparse.linalg import gmres, lgmres
from scipy.sparse import csr_matrix
from functools import cmp_to_key
class LoadModel():
def __init__(self,csvFilePath,alpha):
"""
初始化
"""
self.init(csvFilePath,alpha)
def init(self,csvFilePath,alpha):
"""
读取模型文件,获得推荐必须的参数
:return:
"""
resultTuple=self.readFromCSV(csvFilePath)
# 矩阵行/列数据的含义
self.vertex = resultTuple[0]
# 模型矩阵
self.M=resultTuple[1]
# 某个顶点往下游走的概率alpha,停留在本顶点的是1-alpha
self.alpha=alpha
def readFromCSV(self,csvFilePath):
"""
从csv文件中读取模型数据
:return:
"""
with open(csvFilePath,'r',newline='') as f:
csvReader=csv.reader(f)
vertex=[]
for index,row in enumerate(csvReader):
vertex=row
break
modelM=np.loadtxt(open(csvFilePath,'rb'),dtype='float',delimiter=",",skiprows=1)
return (vertex,self.doChangeM(modelM))
def doChangeM(self, M):
"""
将0/1模型归一化后转换为概率模型
:param M: 0/1模型
:return: 转换后的概率模型
"""
for index, _ in enumerate(M):
row = np.reshape(_, (_.size,))
rowSum = np.sum(row)
M[index] = M[index] / rowSum
return np.matrix(M)
def predict(self, vertex, M, alpha):
"""
预测推荐结果
:param vertex: 行、列数据表达的意思
:param M: 概率模型参数
:param alpha: 每次游走离开结点的概率d
:return: (所有推荐结果,计算时间)
"""
# 一次性计算出从任意节点开始游走的PersonalRank结果。从总体上看,这种方法是最快的
n = M.shape[0]
A = np.eye(n) - alpha * M.T
begin = time.time()
D = A.I
end = time.time()
useTime=end=begin
allDict={}
for j in range(n):
oneDict={}
score = {}
total = 0.0 # 用于归一化
for i in range(n):
score[vertex[i]] = D[i, j]
total += D[i, j]
li = sorted(score.items(), key=cmp_to_key(lambda x, y: x[1] - y[1]), reverse=True)
for ele in li:
oneDict[ele[0]] = float('%.3f' % (ele[1] / total))
allDict[vertex[j]]=oneDict
return (allDict,useTime)
def analysis(self,vertex,allDict,M,N):
"""
分析最后推荐结果(过滤物品顶点出游;过滤每个顶点出游时与用户的相关度;以及取出toN推荐记录)
:param vertex: 模型行列数据对应值得含义
:param allDict: 所有顶点出游的各个顶点的相关度结果字典
:param M: 概率模型数据(暂未使用)
:param N: top N的N值
:return: 返回最终推荐结果
"""
tempM=np.around(M, decimals=4)
vertex=self.vertex
# print(vertex)
# print(tempM)
# 排除用户顶点
# print(allDict)
resultDict={}
for pointKdy, tmpdict in allDict.items():
if 'user' in pointKdy:
recomDict = {}
for pointName,value in tmpdict.items():
if 'user' in pointName:
continue
recomDict[pointName]=value
if recomDict:
resultDict[pointKdy]=recomDict
# print(resultDict)
#包括自己喜欢的商品顶点,因为与自己相关但是自己可能没买过
#最后取出top N作为推荐结果
result={}
for userkey, tmpdict in resultDict.items():
sortList=sorted(tmpdict.items(), key=lambda x: x[1], reverse=True)
# 不足N个会选取所有
topNList=sortList[:N]
result[userkey]=topNList
# print(result)
return result
def writeRecomResult(self,recomResult,recommendedResultOBJPath):
"""
使用pickle模块将python对象持久化到本地文件中,同时也存一份到.txt中参考
:param recomResult: 推荐结果的python对象
:param recommendedResultOBJPath: 持久化文件路径
:return:
"""
with open(recommendedResultOBJPath,'wb') as p_file:
pickle.dump(recomResult,p_file)
with open(recommendedResultOBJPath+'.txt','w') as f:
for userKey,recomList in recomResult.items():
f.write(userKey+"::"+str(recomList)+"\n")
self.readRecomResult(recommendedResultOBJPath)
def readRecomResult(self,recommendedResultOBJPath):
"""
使用pickle模块将数据从持久化文件中读取出来
:param recommendedResultOBJPath: 持久化文件路径
:return: 推荐结果的python对象
"""
recomResult=None
with open(recommendedResultOBJPath,'rb') as p_file:
recomResult = pickle.load(p_file)
return recomResult
def upload2DB(self,recomResult):
"""
上传最后的推荐结果到数据库的推荐表中
:param recomResult: 最后的推荐结果
:return:
"""
db = pymysql.connect("localhost", "root", "admin")
cursor = db.cursor()
cursor.execute("use cherishmall;")
try:
for userKey,recomList in recomResult.items():
tmpList=[]
for goodsId,num in recomList:
tmpDict={"goodId":goodsId,"num":num}
tmpList.append(tmpDict)
jsonStr=json.dumps(tmpList)
nowTime=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
userid=int(userKey.split("_")[1])
sql="insert into recommended_lys(user_id_lys,goods_id_lys,add_time_lys) values('%d','%s','%s')" %(userid,jsonStr,nowTime)
cursor.execute(sql)
db.commit()
except Exception as e:
print("Exception",e)
db.rollback()
db.close()
if __name__=="__main__":
loadModel=LoadModel('model.csv',0.8)
# allDict=loadModel.predict(loadModel.vertex,loadModel.M,loadModel.alpha)[0]
# # print(allDict)
# recomResult=loadModel.analysis(loadModel.vertex,allDict,loadModel.M,5)
# loadModel.writeRecomResult(recomResult,'recommendedResultOBJ')
recomResult=loadModel.readRecomResult("recommendedResultOBJ")
print(recomResult)
loadModel.upload2DB(recomResult)
数据库
对应的二部图:
注:A、B、C分别为user_1、user_2、user_3、user_4 ; a、b、c、d分别为good_1、good_2、good_3、good_4