实现论文SoRec Social Recommendation Using Probabilistic Matrix Factorization

github地址
论文地址

数据集: Epinion dataset
数据网址:http://www.datatang.com/datares/go.aspx?dataid=602020
The dataset contains: 49,290 users who rated a total of 139,738 different items at least once,
writing 664,824 reviews. 487,181 issued trust statements.

Sorec.py

import math
import numpy as np
from numpy.random import RandomState
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix
from matplotlib import pyplot as plt
import time
import copy


def fx(x):
    return (x - 1) / 4

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# The derivative of sigmoid
def sigmoid_(x):
    val = sigmoid(x)
    return val*(1-val)

class MF:
    def __init__(self, R, Validate, C, lr = 0.01,momentum=0.8,
                 lambda_c=10, lambda_u=0.001, lambda_v=0.001, lambda_z=0.001, latent_size=10,iters=1000,seed=None):
        """
        :param R: train set of ratings_data
        :param Validate: validate set of ratings_data
        :param C: trust matrix of users
        :param lr: learning rate
        :param momentum:
        :param lambda_c:
        :param lambda_u:
        :param lambda_v:
        :param lambda_z:
        :param latent_size:
        :param iters: iteration
        :param seed:
        """
        self.R = R
        self.Validate= Validate
        self.C = C
        self.lambda_c = lambda_c
        self.lambda_u = lambda_u
        self.lambda_z = lambda_z
        self.lambda_v = lambda_v
        self.latent_size = latent_size
        self.random_state = RandomState(seed)
        self.iters = iters
        self.lr = lr
        self.U = np.mat(self.random_state.rand(latent_size, np.size(R, 0)))
        self.V = np.mat(self.random_state.rand(latent_size, np.size(R, 1)))
        self.Z = np.mat(self.random_state.rand(latent_size, np.size(C, 1)))
        self.momentum = momentum


    # the MAE for train set
    def train_loss(self, UVdata):
        loss = (np.fabs(4 * sigmoid(UVdata) + 1 - self.R.data)).sum()
        loss /= self.R.shape[0]
        return loss

    # the MAE for validate_set
    def vali_loss(self):
        dif = 0.0
        index = self.Validate.nonzero()
        data = self.Validate.data
        tot = data.shape[0]
        for k in range(tot):
            predict = 4*sigmoid((self.U[:,index[0][k]].T*self.V[:,index[1][k]])[0,0])+1
            dif += math.fabs(data[k] - predict)
        return dif / tot


    def train(self):
        Rindex = self.R.nonzero()
        Cindex = self.C.nonzero()
        Rdata = fx(self.R.data)
        Cdata = self.C.data
        Rnum = Rdata.shape[0]
        Cnum = Cdata.shape[0]
        UVdata = copy.deepcopy(Rdata)
        UZdata = copy.deepcopy(Cdata)
        momentum_u = np.mat(np.zeros(self.U.shape))
        momentum_v = np.mat(np.zeros(self.V.shape))
        momentum_z = np.mat(np.zeros(self.Z.shape))
        train_loss_list = []
        validate_loss_list = []
        begin = time.time()
        minloss = 5.0
        for it in range(self.iters):
            start = time.time()
            for k in range(Rnum):
                UVdata[k] = (self.U[:, Rindex[0][k]].T * self.V[:, Rindex[1][k]])[0][0]
            for k in range(Cnum):
                UZdata[k] = (self.U[:, Cindex[0][k]].T * self.Z[:, Cindex[1][k]])[0][0]

            UV = csr_matrix(((sigmoid(UVdata) - Rdata) * sigmoid_(UVdata), Rindex), self.R.shape)
            UZ = csr_matrix(((sigmoid(UZdata) - Cdata) * sigmoid_(UZdata), Cindex), self.C.shape)

            U = csr_matrix(self.U)
            V = csr_matrix(self.V)
            Z = csr_matrix(self.Z)

            grads_u = self.lambda_u*U + UV.dot(V.T).T + self.lambda_c * UZ.dot(Z.T).T
            grads_v = UV.T.dot(U.T).T + self.lambda_v*V
            grads_z = self.lambda_c * UZ.T.dot(U.T).T + self.lambda_z*Z
            momentum_u = self.momentum * momentum_u + grads_u
            momentum_v = self.momentum * momentum_v + grads_v
            momentum_z = self.momentum * momentum_z + grads_z

            self.U = self.U - self.lr*momentum_u
            self.V = self.V - self.lr*momentum_v
            self.Z = self.Z - self.lr*momentum_z
            trloss = self.train_loss(UVdata)
            valiloss = self.vali_loss()
            train_loss_list.append(trloss)
            validate_loss_list.append(valiloss)
            traintxt = open("train_loss_list___.txt",'a+')
            valitxt = open("validate_loss_list___.txt",'a+')
            traintxt.write(str(trloss)+"\n")
            valitxt.write(str(valiloss)+"\n")
            #np.savetxt("train_loss_list.txt", train_loss_list)
            #np.savetxt("validate_loss_list.txt", validate_loss_list)
            end = time.time()
            print("iter:{}, last_train_loss:{}, validate_loss:{}, timecost:{}, have run:{}".format(it + 1, trloss,
                                                                                                   valiloss,
                                                                                                   end - start,
                                                                                                   end - begin))
            np.savetxt("U_last.txt", self.U)
            np.savetxt("V_last.txt", self.V)
            np.savetxt("Z_last.txt", self.Z)
            if trloss<minloss:
                minloss = trloss
                np.savetxt("U_min" + ".txt", self.U)
                np.savetxt("V_min" + ".txt", self.V)
                np.savetxt("Z_min" + ".txt", self.Z)

        x = np.linspace(1, self.iters, self.iters)
        plt.plot(x, train_loss_list, 'b', label='train_loss')
        plt.plot(x, validate_loss_list, 'r', label='validate_loss')
        plt.legend()
        plt.title("iteration-loss")
        plt.xlabel('iterations')
        plt.ylabel('loss')
        plt.savefig('iteration-loss.png')
        plt.show()
        return self.U, self.V, self.Z, train_loss_list, validate_loss_list

run.py

import numpy as np
from numpy import random
import Sorec
from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix
import copy
import math
from tqdm import tqdm
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))

def get_trust_data(filename="epinions_dataset/trust_data.txt",theshape=(49290,49290)):
    f = open(filename)
    lines = f.readlines()
    row = []
    col = []
    data = []
    for line in tqdm(lines):
        alist = line.strip('\n').split()
        row.append(int(alist[0])-1)
        col.append(int(alist[1])-1)
        data.append(float(alist[2]))
    mtx = coo_matrix((data, (row, col)), shape=theshape)
    indeg = mtx.sum(axis=0)
    outdeg = mtx.sum(axis=1)
    factor = copy.deepcopy(mtx)
    for k in range(factor.data.shape[0]):
        i = factor.row[k]
        j = factor.col[k]
        factor.data[k] = math.sqrt(indeg[0, j]/(indeg[0,j]+outdeg[i, 0]))
    return csr_matrix(factor)

def get_ratings_data(filename="epinions_dataset/ratings_data.txt",theshape=(49290,139739)):
    f = open(filename)
    train_data = []
    train_row = []
    train_col = []
    vali_data = []
    vali_row = []
    vali_col = []
    lines = f.readlines()

    random.shuffle(lines)
    ind = -1
    pos = int(len(lines)*0.99)
    for line in tqdm(lines):
        ind += 1
        alist = line.strip('\n').split()
        if ind>=pos:
            vali_row.append(int(alist[0]))
            vali_col.append(int(alist[1]))
            vali_data.append(int(alist[2]))
            continue
        train_row.append(int(alist[0]))
        train_col.append(int(alist[1]))
        train_data.append(int(alist[2]))

    train_mtx = csr_matrix((train_data, (train_row,train_col)), shape=theshape, dtype='float64')
    vali_mtx = csr_matrix((vali_data, (vali_row, vali_col)), shape=theshape, dtype='float64')
    return train_mtx, vali_mtx

if __name__ == '__main__':
    print("loading data...")
    trust_data = get_trust_data()
    # train set and validae set
    ratings_data_train, ratings_data_validate = get_ratings_data()
    print("loading done...")
    print("begin to train...")
    socmodel = Sorec.MF(ratings_data_train, ratings_data_validate, trust_data, lr=0.01, momentum=0.5, latent_size=10,iters=300)
    U,V,Z,train_loss_list, validate_loss_list = socmodel.train()
    print("train done...")
原创文章 39 获赞 5 访问量 4928

猜你喜欢

转载自blog.csdn.net/q1072118803/article/details/104665790