# Put the code into the past, notes later finishing
#pip install surprise
#可能需要c++的配置环境
#老师说网上基本上没有什么资源,还是看官网
from surprise import KNNBasic,SVD
from surprise import DataSet
from surprise import evaluate.print_perf
data= DataSet.load_builtin("ml-100k')#电影相关的数据集 用户号 电影号 评分 还有个什么
data.split(n_folds=3)
algo=KNNBasic()
perf=evaluate(algo,data,measures=["RMSE","MAE"] #评估的方法
print_perf(perf)
from Surprise import GridSearch
#把参数的候选值都写进字典 8种 老师说最好用gridsearch默认的 不然耗费气力
param_grid={"n_epochs":[5,10],"lr_all":[0.002,0.005],
"reg_all":[0.4,0.6]}
#大概有10来种算法
grid_search=GridSearch(SVD,param_grid,measures="RMSE","FCP"])
data=DataSet.load_builtin("ml-100k")
data.split(n_folds=3)
grid_search.evaluate(data)
#训练好模型会帮我们保存一个最好的属性
print(grid_search.best_score["RMSE"]
print(grid_search.best_params["RMSE"]
print(grid_search.best_score["FCP"]
print(grid_search.best_params["FCP"]
import pandas as pd
#把结果传进dict 看具体每一个对应的
results_df=pd.DataFrame.from_dict(grid_search.cv_results)
print(results_df)
#可以做一些推荐
from __feature__ import(absolute_import,division,print_function,unicode_literals)
import os
import io
from surprise import KNNBaseline
from surprise import DataSet
def read_item_names():
file_name=("./ml-100k/.u.item")#很特别编码 特殊打开
rid_to_name={}
name_to_rid={}
#with…as语句是简化版的try except finally语句。
#电影的名字做一个id映射
with io.open(file_name,"r",encoding="ISO-8859-1") as f:
for line in f:
line=line.split("|")
rid_to_name[line[0]]=line[1]
name_to_rid[line[1]]=line[0]
return rid_to_name,name_to_rid
#需要转换成原始的矩阵的形式
data=DataSet.load_builtin("ml-100k")
trainset=data.build_full_trainset()#纠正/取出训练集
sim_options={"name": "person_baseline","user_based":False}
algo=KNNBaseline(sim_options=sim_options)
algo.train(trainset)
rid_to_name,name_to_rid=read_item_names()
#传进来一个电影的名字 样本id
toy_story_raw_id=name_to_rid["NOW AND THEN 1995"]
toy_story_raw_id
#矩阵id
toy_story_inner_id=algo_trainset.to_inner_iid(toy_story_raw_id)
toy_story_inner_id
toy_story_neighbors=algo.get_neighbors(toy_story_inner_id,k=10)
toy_story_neighbors
toy_story_neighbors=(algo_trainset.to_raw_iid(inner_id)
for inner_id in toy_story_neighbors)
toy_story_neighbors=(rid_to_name[rid]
for rid in toy_story_neighbors)
print()
print("The 10 nearest neighbors of Toy Story are : ")
for movie in toy_story_neighbors:
print(movie)