Recommended system
Note: If you think the blog is good, don’t forget to like and collect it. I will update the content related to artificial intelligence and big data every week. Most of the content is original, Python Java Scala SQL code, CV NLP recommendation system, etc., Spark Flink Kafka, Hbase, Hive, Flume, etc. are all pure dry goods, and the interpretation of various top conference papers makes progress together.
Today, I will continue to share with you the SVD matrix decomposition code case
#博学谷IT Learning Technical Support
Article directory
foreword
Today I will share with you the case of SVD matrix decomposition code in the recommendation system
1. Read the movie dataset
import pandas as pd
import numpy as np
users_Name=['user_id','gender','age','work','zip']
ratings_Name=['user_id','movies_id','ratings','timeStamp']
movie_Name=['movie_id','title','calss']
users=pd.read_table('./dataset/ml-1m/users.dat',encoding='latin-1',sep='::',header=None,names=users_Name)
ratings=pd.read_table('./dataset/ml-1m/ratings.dat',encoding='latin-1',sep='::',header=None,names=ratings_Name)
movies=pd.read_table('./dataset/ml-1m/movies.dat',encoding='latin-1',sep='::',header=None,names=movie_Name)
print('用户表记录数:',len(users),'评分表记录数:',len(ratings),'电影表记录数:',len(movies))
print('**********用户表前五条记录**********')
print(users.head(5))
print('**********评分表前五条记录**********')
print(ratings.head(5))
print('********电影表前五条记录************')
print(movies.head(5))
二、SVD
class SVD:
def __init__(self, learning_rate, regularized_rate, max_step, n_users, n_items, n_factors):
self.learning_rate = learning_rate
self.regularized_rate = regularized_rate
self.max_step = max_step
self.bu = np.zeros(n_users, np.double)
self.bi = np.zeros(n_items, np.double)
self.pu = np.zeros((n_users, n_factors), np.double)
self.qi = np.zeros((n_items, n_factors), np.double)
self.mean = 0
def get_pred_value(self, u, i):
return self.mean + self.bu[u] + self.bi[i] + np.dot(self.pu[u], self.qi[i])
def fit(self, X):
for index, row in X.iterrows():
u, i, r = row['user_id'], row['movies_id'], row['ratings']
err = r - self.get_pred_value(u, i)
self.bu[u] += self.learning_rate * (err - self.regularized_rate * self.bu[u])
self.bi[i] += self.learning_rate * (err - self.regularized_rate * self.bi[i])
tmp = self.pu[u]
self.pu[u] += self.learning_rate * (err * self.qi[i] - self.regularized_rate * self.pu[u])
self.qi[i] += self.learning_rate * (err * tmp - self.regularized_rate * self.qi[i])
if index == self.max_step:
break
return self
def transform(self, X):
result = [0] * len(X)
for index, row in X.iterrows():
u, i, r = row['user_id'], row['movies_id'], row['ratings']
result[index] = self.get_pred_value(u, i)
return result
if __name__ == '__main__':
algo = SVD(learning_rate=learning_rate, regularized_rate=regularized_rate, max_step=max_step, n_users=n_users,
n_items=n_items, n_factors=n_factors)
model = algo.fit(ratings)
result = model.transform(ratings)
print(result[:10])
Get the output result:
[0.712061395401814, 0.19680425997661766, 0.2770262567858081, 0.4959207041970778, 0.6168076353251164, 0.9046821307908 399, 0.29919052853100275, 0.554204908108176, 0.30406646989523367, 0.6900106995374652]
Summarize
According to the custom SVD proof decomposition code case