Traditional Recommendation Methods(传统推荐系统FM)

版权声明:本文为博主自我学习整合内容,欢迎转载,转载请注明出处。 https://blog.csdn.net/qq_39388410/article/details/84958683

在这里插入图片描述
在处理MF矩阵分解时使用过FunkSVD,最后在分解诶P,Q矩阵的时候果然还是用到了一般套路,根据预测y和实际y的差别梯度下降来寻找。所以能否直接从这个思路,把它变成多个特征的回归模型是否可行? y = w 0 + i = 1 n w i x i n y=w_{0}+\sum\limits_{i=1}^nw_{i}x_{i},n表示特征数量 但是普通的线性模型,并没有考虑到特征与特征之间的相互关系。所以加上一项: y = w 0 + i = 1 n w i x i + i = 1 n 1 j = i + 1 n w i j x i x j y=w_{0}+\sum\limits_{i=1}^nw_{i}x_{i}+\sum_{i=1}^{n-1}\sum_{j=i+1}^{n}w_{ij}x_{i}x_{j} 但是在数据矩阵很稀疏的情况下,即xi,xj非0的情况非常少,ωij实际上是无法仅仅通过训练得出。于是需要引入一个辅助向量 V i = ( v i 1 , v i 2 , , v i k ) T V_{i}=(v_{i1},v_{i2},⋯,v_{ik})^{T} ,其中k为超参,可以将y改写成: y = w 0 + i = 1 n w i x i + i = 1 n 1 j = i + 1 n ( v i T v j ) x i x j y=w_{0}+\sum\limits_{i=1}^nw_{i}x_{i}+\sum_{i=1}^{n-1}\sum_{j=i+1}^{n}(v_{i}^Tv_j)x_{i}x_{j}
即引入V:
在这里插入图片描述
此时的交互矩阵,
在这里插入图片描述
也就是说我们相对对W进行了一种矩阵分解,那么在高稀疏上的表达上得到V相对来说是容易的。同样我们接着要求导,先化简一下后面的式子:
在这里插入图片描述
然后再求导和随机梯度下降SGD就行了。下面使用经典的MovieLens100k数据集,也就是由明尼苏达大学和研究人员收集整理的1000209匿名评级约3900部电影的评分。数据包括四列,用户id,电影id,评分和时间戳。

            user  item  rating  timestamp
   0         1     1       5    874965758
   1         1     2       3    876893171
   2         1     3       4    878542960
   3         1     4       3    876893119
   4         1     5       3    889751712
   5         1     6       5    887431973
   6         1     7       4    875071561
   7         1     8       1    875072484
   8         1     9       5    878543541
   9         1    10       3    875693118

FM的代码为:

from itertools import count
from collections import defaultdict
from scipy.sparse import csr
import numpy as np
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
import tensorflow as tf
from tqdm import tqdm
#from tqdm import tqdm_notebook as tqdm

######数据处理
#将原始文件输入转换成我们需要的稀疏矩阵(稀疏矩阵编码格式)
def vectorize_dic(dic,ix=None,p=None,n=0,g=0):
    if ix==None:
        ix = dict()
        
    nz = n * g

    col_ix = np.empty(nz,dtype = int)#每行起始的偏移量

    i = 0
    for k,lis in dic.items():#遍历文档
        for t in range(len(lis)):#遍历每个词
            ix[str(lis[t]) + str(k)] = ix.get(str(lis[t]) + str(k),0) + 1
            col_ix[i+t*g] = ix[str(lis[t]) + str(k)]
        i += 1

    row_ix = np.repeat(np.arange(0,n),g)#每个数对应的列号
    data = np.ones(nz)
    if p == None:
        p = len(ix)

    ixx = np.where(col_ix < p)#输出满足条件的值
    return csr.csr_matrix((data[ixx],(row_ix[ixx],col_ix[ixx])),shape=(n,p)),ix

#batch函数
def batcher(X_, y_=None, batch_size=-1):
    n_samples = X_.shape[0]

    if batch_size == -1:
        batch_size = n_samples
    if batch_size < 1:
       raise ValueError('Parameter batch_size={} is unsupported'.format(batch_size))

    for i in range(0, n_samples, batch_size):
        upper_bound = min(i + batch_size, n_samples)
        ret_x = X_[i:upper_bound]
        ret_y = None
        if y_ is not None:
            ret_y = y_[i:i + batch_size]
            yield (ret_x, ret_y)

#读入数据
cols = ['user','item','rating','timestamp']
train = pd.read_csv('data/ua.base',delimiter='\t',names = cols)
test = pd.read_csv('data/ua.test',delimiter='\t',names = cols)

print(train,test)
x_train,ix = vectorize_dic({'users':train['user'].values,
                            'items':train['item'].values},n=len(train.index),g=2)


x_test,ix = vectorize_dic({'users':test['user'].values,
                           'items':test['item'].values},ix,x_train.shape[1],n=len(test.index),g=2)

#变换后的形式
print(x_train)
y_train = train['rating'].values
y_test = test['rating'].values

#得到变换后的矩阵形式
x_train = x_train.todense()
x_test = x_test.todense()

print(x_train)
print(x_train.shape)
print (x_test.shape)

#######Tensorflow搭建
#定义损失函数
n,p = x_train.shape

k = 10#设置超参k

x = tf.placeholder('float',[None,p])
y = tf.placeholder('float',[None,1])

w0 = tf.Variable(tf.zeros([1]))
w = tf.Variable(tf.zeros([p]))

v = tf.Variable(tf.random_normal([k,p],mean=0,stddev=0.01))

#y_hat = tf.Variable(tf.zeros([n,1]))

linear_terms = tf.add(w0,tf.reduce_sum(tf.multiply(w,x),1,keep_dims=True)) #按行求和
#得到化简后的函数
pair_interactions = 0.5 * tf.reduce_sum(tf.subtract( tf.pow( tf.matmul(x,tf.transpose(v)),2), tf.matmul(tf.pow(x,2),tf.transpose(tf.pow(v,2)))),axis = 1 , keep_dims=True)

#完整的预测函数y
y_hat = tf.add(linear_terms,pair_interactions)

#正则化项
lambda_w = tf.constant(0.001,name='lambda_w')
lambda_v = tf.constant(0.001,name='lambda_v')

l2_norm = tf.reduce_sum(
    tf.add(tf.multiply(lambda_w,tf.pow(w,2)),tf.multiply(lambda_v,tf.pow(v,2))))

#error和loss
error = tf.reduce_mean(tf.square(y-y_hat))
loss = tf.add(error,l2_norm)

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)#梯度下降

#模型训练
epochs = 1
batch_size = 5000

# Launch the graph
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)

    for epoch in tqdm(range(epochs), unit='epoch'):#输出进行过程
        perm = np.random.permutation(x_train.shape[0])#打乱顺序
        # iterate over batches
        for bX, bY in batcher(x_train[perm], y_train[perm], batch_size):
            _,t = sess.run([train_op,loss], feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)})
            print(t)


    errors = []
    for bX, bY in batcher(x_test, y_test):
        errors.append(sess.run(error, feed_dict={x: bX.reshape(-1, p), y: bY.reshape(-1, 1)}))
        print(errors)
    RMSE = np.sqrt(np.array(errors).mean())
    print (RMSE)


猜你喜欢

转载自blog.csdn.net/qq_39388410/article/details/84958683