某购物网站推荐系统实现 | 数据库、推荐算法

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_40006058/article/details/83278865

MySQL数据库配置文件config.py分析

HOST = 'localhost'
USER = 'root'
PASSWORD = '123456'
DATABASE = 'llllll'

数据库操作文件db.py分析

import pymysql                      # 数据库用的MySQL
from config import *

# 获取数据库连接
def get_db():
    return pymysql.connect(HOST, USER, PASSWORD, DATABASE)

# 执行查询多条的sql语句
def select(sql):
    db = get_db()
    cursor = db.cursor()                             # 使用cursor()获取操作游标
    try:
        cursor.execute(sql)                          # 使用execute()方法执行sql语句
        result = cursor.fetchall()                   # 使用fetchall()方法获取所有数据
        return result
    except:
        db.rollback()                                # 发生错误时回滚
    finally:
        db.close()                                   # 关闭连接

# 执行查询单条的sql语句
def select_one(sql):
    db = get_db()
    cursor = db.cursor()
    try:
        cursor.execute(sql)
        result = cursor.fetchone()                  # 使用fetchone()方法获取一条数据
        return result
    except:
        db.rollback()
    finally:
        db.close()

推荐算法1(购买过此物品的用户还购买过)recommend1.py分析

属性名称 类型 描述
good_id int 商品编号
good_img string 商品图片
id int 商品编号
img string 商品图片
title string 商品名称
eva_num int 商品评价人数
# itemgetter 用于获取对象的哪些位置的数据,参数即为代表位置的序号值
from operator import itemgetter
from db import select

# 将购买过good_id的用户及其购物信息筛选出来
def get_data(good_id):
    sql = 'SELECT u_g.userId, u_g.goodsId, g.img, g.title, g.eva_num FROM user_goods u_g, goods g WHERE userId in (SELECT userId FROM user_goods WHERE goodsId = %d) AND u_g.goodsId = g.id' %good_id
    result = select(sql)
    data = {}                                        # 数据矩阵
    for row in result:                               # 遍历result,将数据以字典的形式存储到data中
        if row[0] not in data:                       # 如果data中没有这用户,则将这个用户添加进去
            data[row[0]] = []
        data[row[0]].append({'id':row[1],'img': row[2],'title': row[3],'eva_num': row[4]})
    # print(data)
    return data                                      # data 的结构与下面注释的data的结构近似

"""
>>> import numpy as np
>>> data = {}
>>> result = np.array([[1,11,111],[2,22,222],[3,33,333],[1,44,444]])
>>> for row in result:
...     if row[0] not in data:
...         data[row[0]] = []
...     data[row[0]].append({'id':row[1],'title':row[2]})
...
>>> data
{1: [{'id': 11, 'title': 111}, {'id': 44, 'title': 444}], 2: [{'id': 22, 'title': 222}], 3: [{'id': 33, 'title': 333}]}
"""

# 购买过此商品的用户还购买过——推荐算法
def recommend1(good_id, count):
    data = get_data(good_id)
    goods_count = {}                                # 统计每个商品出现的次数
    goods_data = []                                 # 统计每个商品的信息
    for user in data.keys():                        # 遍历大字典,用户为键,商品信息为值
        for good in data[user]:                     # 遍历小字典
            if good['id'] == good_id:               # 如果与已给出的商品相同,则退出此次循环,查看下一个
                continue
            if good['id'] not in goods_count:       # 如果这个商品没在goods_count中,则添加给物品,默认出现次数为0
                goods_count.setdefault(good['id'], 0)
            goods_count[good['id']] += 1            # 将这个商品的出现次数+1 
            if good['id'] not in goods_data:        #如果这个商品不在goods_data中,则添加这个商品
                goods_data.append(good)
    # print('商品出现的频次', goods_count)
    if count < len(goods_count):                    # 需要推荐的商品个数小于 len(goods_count),则推荐前count个
        goods_count_sort = sorted(goods_count.items(), key=itemgetter(1), reverse=True)[:count]
    else:                                           # 需要推荐的商品个数大于 len(goods_count),则推荐goods_count里的所有商品
        goods_count_sort = goods_count
    # print('商品出现的频次排序', goods_count_sort)
    count_sort = []
    for good_id in goods_count_sort:                # 统计待推荐的商品序号
        count_sort.append(good_id[0])
    print('所要推荐的商品的序号', count_sort)
    # print('商品信息数据', goods_data, end='\n')
    # print('要推荐的商品信息')
    recommend_goods = []                            # 存储要推荐的商品
    for id in count_sort:                           # 统计要推荐的商品的信息
        for good in goods_data:
            if id == good['id'] and good not in recommend_goods:
                # print('item', good)
                recommend_goods.append(good)
    return recommend_goods

if __name__ == '__main__':
    # get_data(1)
    for good in recommend1(1, 6):                   # 推荐6个与商品1相似的商品
        print(good)

推荐算法2(根据用户的相似度推荐商品) recommend2.py分析

import math
from operator import itemgetter
from db import select

# 读出所要的数据并形成矩阵
def get_data():
    sql = 'SELECT u_g.userId, u_g.goodsId, g.img, g.title, g.eva_num FROM user_goods u_g, goods g WHERE u_g.goodsId = g.id'
    result = select(sql)
    return result

# 根据用户购买商品的余弦相似度推荐算法
def recommend2(user_id, user_num, recommend_num):
    result = get_data()
    user_goods_info = {}                                    # 带有详细信息的数据矩阵{用户:【商品及其信息。。。】。。。}
    user_goods_matrix = {}                                  # 用户商品矩阵只有用户编号和商品编号的数据矩阵,便于算法使用{用户:【商品。。。】。。。}
    for row in result:                
        if row[0] not in user_goods_matrix:
            user_goods_info.setdefault(row[0],[])
            user_goods_matrix.setdefault(row[0], [])
        user_goods_info[row[0]].append({'id':row[1],'img': row[2],'title': row[3],'eva_num': row[4]})
        user_goods_matrix[row[0]].append(row[1])
        
    good_user_matrix = {}                                   # 商品用户矩阵,将用户商品矩阵 user_goods_matrix 转换为商品用户矩阵 good_user_matrix,{商品:【用户。。。】。。。}
    for user, goods in user_goods_matrix.items():
        for good in goods:
            if good not in good_user_matrix:
                good_user_matrix.setdefault(good, set())
            good_user_matrix[good].add(user)
            
    user_inner_matrix = {}                                  # 用户之间相同商品数量矩阵,统计用户之间购买过相同商品的数量{用户1:{用户2:相同商品数,。。。}。。。}
    for users in good_user_matrix.values():
        for user1 in users:
            for user2 in users:
                if user1 == user2:
                    continue
                if user1 not in user_inner_matrix:
                    user_inner_matrix.setdefault(user1, {})
                if user2 not in user_inner_matrix[user1]:
                    user_inner_matrix[user1][user2] = 0
                user_inner_matrix[user1][user2] += 1

    user_similar_matrix = {}                                # 用户之间相似度矩阵,计算用户之间的相似度{用户1:{用户2:相似度,。。。}。。。}
    for user1, inner in user_inner_matrix.items():
        for user2, num in inner.items():
            if user1 not in user_similar_matrix:
                user_similar_matrix.setdefault(user1, {})
            if user2 not in user_similar_matrix[user1]:
                user_similar_matrix[user1][user2] = 0
            user_similar_matrix[user1][user2] = num / math.sqrt(len(user_goods_matrix[user1]) * len(user_goods_matrix[user2]))

    user_goods = user_goods_matrix[user_id]                 # 目标用户的商品集合
    """
                       key=itemgetter(1)           按值排序
                       reverse=True                降序排列
    """
    user_similar_matrix = sorted(user_similar_matrix[user_id].items(), key=itemgetter(1), reverse=True)[:user_num] # 和目标用户的相似用户相似度排序
 
    good_ids = {}                                             # 要推荐的商品的序号及相似度( 未排序)
    for user, similar in user_similar_matrix:
        for goods in user_goods_matrix[user]:
            if goods in user_goods:
                continue
            if goods not in good_ids:
                good_ids.setdefault(goods, 0)
            good_ids[goods] += similar
    good_ids = sorted(good_ids.items(), key=itemgetter(1), reverse=True)[:recommend_num]  # 排序后的商品的序号及相似度

    count_sort = []                                           # 要推荐的商品的序号
    for id in good_ids:
        count_sort.append(id[0])

    goods_data = []                                           # 商品信息数组
    for user in user_goods_info.keys():
        for good in user_goods_info[user]:
            if good not in goods_data:
                goods_data.append(good)

    recommend_goods = []                                      # 要推荐的详细商品
    for id in count_sort:
        for good in goods_data:
            if id == good['id'] and good not in recommend_goods:
                recommend_goods.append(good)
    return recommend_goods

if __name__ == '__main__':
    print(recommend2(2, 2, 4))

推荐算法3(根据商品的相似度推荐)recommend3.py分析

import json
from operator import itemgetter
from db import select,select_one
from jieba import posseg, analyse                                          # 分词、关键词提取

# 读出所要的数据并形成矩阵
def get_data(current_page):
    start = ((current_page-1)*24)+1
    end = start + 24
    sql = 'SELECT id, img, title, eva_num FROM goods WHERE id NOT IN ' \
          '(SELECT id FROM goods WHERE id >= %d AND id < %d)' %(start, end)
    result = select(sql)
    return result

# 根据商品的相似度推荐
def recommend3(current_page, goods_num):
    sql = 'SELECT words FROM word WHERE id = %d' %current_page
    result_words = select_one(sql)[0]

    result = get_data(current_page)
    other_goods_info = {} # 详细信息的其他商品矩阵
    other_goods = {} # 便于分析的其他商品矩阵
    for row in result:
        if row[0] not in result:
            other_goods_info.setdefault(row[0],[])
            other_goods.setdefault(row[0],[])
        other_goods_info[row[0]] = {'id':row[0],'img': row[1],'title': row[2],'eva_num': row[3]}
        other_goods[row[0]].append(row[2])
   
    stop_words = []
    with open('stop_words.txt', 'r', encoding='utf-8') as obj:             # 获取停用词
        stop_words = obj.readlines()

    other_goods_words = {}                                                 # {id:[关键词]}
    for i, (id, oword) in enumerate(other_goods.items()):
        # print(id, ':', oword[0])
        keys = []
        owords = posseg.cut(oword[0])                                      # 将title分词
        for owds in owords:
            if owds.flag.startswith('n') and owds.word not in stop_words:
                keys.append(owds.word)
        if id not in other_goods_words:
            other_goods_words.setdefault(id, [])
        other_goods_words[id] = analyse.extract_tags(str(keys), topK=4)    # 关键词提取
    # print('other_goods_words', other_goods_words)
    result_words = json.loads(result_words.replace('\'', '\"'))
    # print('result_words', result_words)
    recommend_sort = {}
    for id, word in other_goods_words.items():
        if id not in recommend_sort:
            recommend_sort.setdefault(id, 0)
        # print('test')
        # print(set(word))
        # print(set(result_words))
        recommend_sort[id] = len(set(word) & set(result_words)) / len(result_words)
    recommend_sort = sorted(recommend_sort.items(), key=itemgetter(1), reverse=True)[:goods_num]
    count_sort = [] # 要推荐的商品编号集合
    for row in recommend_sort:
        if row[0] not in count_sort:
            count_sort.append(row[0])
    print('推荐序号', count_sort)
    recommend_goods = []  # 要推荐的详细商品
    # print('推荐详细商品')
    for id in count_sort:
        for key, value in other_goods_info.items():
            if id == key and key not in recommend_goods:
                recommend_goods.append(value)
    return recommend_goods

if __name__ == '__main__':
    recommend3(2,5)

响应状态码文件status_code.py 分析

from flask import jsonify

# 状态码列表
STATUS_CODE_200 = {'code': 200, 'message': 'OK all right.'}
STATUS_CODE_201 = {'code': 201, 'message': 'All created.'}
STATUS_CODE_204 = {'code': 204, 'message': 'All deleted.'}
STATUS_CODE_400 = {'code': 400, 'message': 'Bad request.'}
STATUS_CODE_403 = {'code': 403, 'message': 'You can not do this.'}
STATUS_CODE_404 = {'code': 404, 'message': 'No result matched.'}

# 带数据的响应
def data_response(status_code, data):
    return jsonify({'status': status_code, 'data': data})

# 带分页数据的响应
def data_with_page_response(status_code, data, current_page, total_pages, total_items):
    return jsonify({'status': status_code, 'data': data, 'current_page':current_page, 'total_pages': total_pages, 'total_items':total_items})

# 只有状态码的响应
def status_response(status_code):
    return jsonify({'status': status_code})

Flask框架主程序app.py分析

import os
from math import ceil
from flask import Flask, send_file as _send_file
from db import select, select_one
from status_code import *
from recommend1 import recommend1
from recommend2 import recommend2
from recommend3 import recommend3
app = Flask(__name__)

# beken code
def get_folder(folder):
    return os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        folder)

def send_file(filename):
    return _send_file(os.path.join('webroot', filename))

# 返回页面
@app.route('/detail/<int:id>')
def detail(id):
    return send_file('detail.html')

@app.route('/')
def index():
    return send_file('index.html')

# 按页获取商品列表 API,/api/goods/<int:page>/<int:offset>
# 说明:前台发送 GET 请求,传入需要显示页面的页数 page,页数从 1 开始,以及每页想显示的条数 offset
# return data, total_pages, current_page, total_items
@app.route('/api/goods/<int:page>/<int:offset>')
def goods_list_page(page, offset):
    sql = "SELECT COUNT(id) FROM goods"
    result = select_one(sql)
    total_items = result[0] # 总条数
    total_page = int(ceil(total_items / offset)) # 总页数
    data = []
    if page > 0 or page <= total_page:
        start = (page - 1) * offset
        sql = "SELECT id, img, origin_country, brand, tag, title, cell_price, pefer_price, activity, explains, eva_num FROM goods LIMIT %d, %d" %(start, offset)
        result = select(sql)
        for row in result:
            data.append({'id': row[0], 'img': row[1], 'origin_country': row[2], 'brand': row[3], 'tag': row[4], 'title': row[5], 'cell_price':row[6], 'pefer_price':row[7], 'activity':row[8], 'explains':row[9], 'eva_num':row[10]})
    return data_with_page_response(STATUS_CODE_200, data, current_page=page, total_pages=total_page, total_items=total_items)

# 获取某个商品的 API,/api/goods/<int:id>
@app.route('/api/goods/<int:id>')
def get_good_by_id(id):
    sql = "SELECT id, img, origin_country, brand, tag, title, cell_price, pefer_price, activity, service, explains, eva_score, eva_num, sun_num FROM goods WHERE id = %d" %id
    row = select_one(sql)
    data = [{'id': row[0], 'img': row[1], 'origin_country': row[2], 'brand': row[3], 'tag':row[4], 'title': row[5], 'cell_price': row[6], 'pefer_price': row[7], 'activity': row[8], 'service': row[9], 'explains': row[10], 'eva_score': row[11], 'eva_num': row[12], 'sun_num': row[13]}]
    return data_response(STATUS_CODE_200, data)

# 根据关键字搜索商品的 API,/api/goods/<string:keyword>/<int:goods_num>
@app.route('/api/search/<string:keyword>/<int:goods_num>')
def find_goods_by_name(keyword, goods_num):
    print('keyword', keyword, 'good_num', goods_num)
    sql = "SELECT id, img, origin_country, brand, tag, title, cell_price, pefer_price, activity, explains, eva_num FROM goods WHERE title LIKE '%%%s%%' LIMIT %d" %(keyword, goods_num)
    print(sql)
    result = select(sql)
    data = []
    for row in result:
        data.append(
            {'id': row[0], 'img': row[1], 'origin_country': row[2], 'brand': row[3], 'tag': row[4], 'title': row[5],
             'cell_price': row[6], 'pefer_price': row[7], 'activity': row[8], 'explains': row[9], 'eva_num': row[10]})
    return data_response(STATUS_CODE_200, data)

# 获取购买过此商品的用户还购买过推荐商品的 API,/api/goods/recommend1/<int:good_id>/<int:count>
@app.route('/api/goods/recommend1/<int:good_id>/<int:count>')
def get_recommend1_goods(good_id, count):
    # print('传过来的值为 recommend_id:' + str(recommend_id) + 'good_id:' + str(good_id) + 'count:' + str(count))
    data = recommend1(good_id, count)
    return data_response(STATUS_CODE_200, data)

# 根据用户购买过商品的余弦相似度推荐商品的 API,/api/goods/recommend2/<int:user_id>/<int:user_num>/<int:recommend_num>
@app.route('/api/goods/recommend2/<int:user_id>/<int:user_num>/<int:recommend_num>')
def get_recommend2_goods(user_id, user_num, recommend_num):
    # print('传过来的值为 user_id:' + str(user_id) + 'good_id:' + str(user_num) + 'count:' + str(recommend_num))
    data = recommend2(user_id, user_num, recommend_num)
    return data_response(STATUS_CODE_200, data)

# 根据商品的标题分词推荐关联高的商品的 API
@app.route('/api/goods/recommend3/<int:current_page>/<int:recommend_num>')
def get_recommend3_goods(current_page, recommend_num):
    # print('传过来的值为 current_page:', str(current_page) + ',goods_num:', str(goods_num))
    data = recommend3(current_page, recommend_num)
    return data_response(STATUS_CODE_200, data)

if __name__ == '__main__':
    app.run()

本文根据github整理

猜你喜欢

转载自blog.csdn.net/qq_40006058/article/details/83278865
今日推荐