python_surprise_KNNWithMeans_GridSearchCV

# 使用GridSearchCV调整KNNWithMeans算法参数
from surprise import KNNWithMeans
from surprise import Dataset
from surprise.model_selection import GridSearchCV

# Use movielens-100K
data = Dataset.load_builtin('ml-100k')

param_grid = {
    
    'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}
gs = GridSearchCV(KNNWithMeans, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])
print(gs.best_score['mae'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])
print(gs.best_params['mae'])
# 采用rmse最优参数 获取算法模型
algo = gs.best_estimator['rmse']
# 采用最优模型训练数据集
algo.fit(data.build_full_trainset())
# 从训练集中获取指定电影的 内部ID
a = algo.trainset.to_inner_iid('192')
# 协同过滤得到最相似的10个电影的内部ID
toy_story_neighbors = algo.get_neighbors(a, k=10)
print(toy_story_neighbors)
# 通过文件得到 ID 和 电影名之间的字典
def read_item_names():
    """
    获取电影名到电影id 和 电影id到电影名的映射
    """

    file_name = (os.path.expanduser('~') + '/.surprise_data/ml-100k/ml-100k/u.item')
    rid_to_name = {
    
    }
    name_to_rid = {
    
    }
    with open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = line[1]
            name_to_rid[line[1]] = line[0]

    return rid_to_name, name_to_rid

# 获取电影名到电影id 和 电影id到电影名的映射
rid_to_name, name_to_rid = read_item_names()
# lamda表达式 获取 原始电影ID
toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id) for inner_id in toy_story_neighbors)
# lamda表达式 获取电影名
toy_story_neighbors = (rid_to_name[rid] for rid in toy_story_neighbors)
for movie in toy_story_neighbors:
    print(movie)

Guess you like

Origin blog.csdn.net/q18729096963/article/details/112500557