[Code Review] (2008, KDD) Factorization meets the neighborhood: a multifaceted collaborative filtering model

tobigs-recsys·2021년 4월 22일
4

Recommender_System

목록 보기
2/22
post-thumbnail

작성자 : 이혜린

Factorization Meets the Neighborhood : a Multifaceted Collaborative Filtering Model


코드 출처

Github link

실행 방법

  1. 데이터셋 다운 : Netflix Datasets
  2. 경로지정 : ./Datasets/파일명
    utils.py 에서 경로를 ./Datasets/로 수정하기
  3. 데이터 정제 : rating_compiler.py 실행
  4. 모델 파일 실행
    • baseline_estimator.py
    • correlation_based_neighbourhood_model.py
    • correlation_based_implicit_neighbourhood_model.py
    • svd_more_more.py
    • integrated_model.py

Baseline_estimator.py

  • user bias와 item bias 계산
    bui=μ+bu+bib_{ui} = \mu + b_{u} + b_{i}
    bub_{u} : user bias (같은 item, 다른 user일 때 생기는 bias)
    bib_{i} : item bias (다른 item, 같은 user일 때 생기는 bias)
  • Loss function

    minb(u,i)K(ruiμbubi)2+λ1(ubu2+ibi2)\min\limits_{b*}\sum_{(u,i)\in K}(r_{ui} - \mu - b_{u} - b_{i})^2 + \lambda_{1}(\sum_{u}b_{u}^2 + \sum_{i}b_{i}^2)

def compute_loss(mat, mu, bu, bi, l_reg=0.02):
    loss = 0

    no_users_entries = np.array((mat != 0).sum(1)).T.ravel()
    bu_rep = np.repeat(bu.ravel(), no_users_entries)

    no_movies_entries = np.array((mat != 0).sum(0)).ravel()
    bi_rep = np.repeat(bi.ravel(), no_movies_entries)

    temp_mat = sparse.csc_matrix(mat).copy()      
    temp_mat.data[:] -= bi_rep
    temp_mat.data[:] -= mu
    temp_mat = sparse.coo_matrix(temp_mat)
    temp_mat = sparse.csr_matrix(temp_mat)
    temp_mat.data[:] -= bu_rep

    loss = (temp_mat.data[:] ** 2).sum()

    loss_reg = l_reg * ((bu**2).sum() + (bi**2).sum())  
    #loss += loss_reg

    return loss, loss+loss_reg
  • Parameter bu,bib_{u}, b_{i} : Gradient Descent Update
    Loss function을 bu,bib_{u}, b_{i}에 대해 편미분한 값을 gradient로 사용
def baseline_estimator(mat, mat_file, l_reg=0.02, learning_rate=0.0000025):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
    mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]
  
    bu_index, bi_index = pre_processing(mat, mat_file)

    bu = np.random.rand(no_users,1)  * 2 - 1 # set initial value
    bi = np.random.rand(1,no_movies) * 2 - 1
    #bu = np.zeros((no_users,1))
    #bi = np.zeros((1,no_movies))  

    mu = mat.data[:].mean()
    mat_sum1 = mat.sum(1)
    mat_sum0 = mat.sum(0)
    n = mat.data[:].shape[0]

    no_users_entries = np.array((mat != 0).sum(1))
    no_movies_entries = np.array((mat != 0).sum(0))

    # Train
    print("Train...")
    n_iter = 200
    for it in range(n_iter):

        #bi_sum = bi[bi_index].sum(1).reshape((no_users,1))
        #bu_sum = bu.ravel()[bu_index].sum(0).reshape((1,no_movies)) 
        
        # gradient update 
        bi_sum = np.array(list(map(lambda x:bi.ravel()[x].sum(), bi_index))).reshape((no_users,1))
        bu_sum = np.array(list(map(lambda x:bu.ravel()[x].sum(), bu_index))).reshape((1,no_movies))    

        # Vectorized operations
        bu_gradient = - 2.0 * (mat_sum1 - no_users_entries  * mu - no_users_entries  * bu - bi_sum) + 2.0 * l_reg * bu
        bu -= learning_rate * bu_gradient 

        bi_gradient = - 2.0 * (mat_sum0 - no_movies_entries * mu - no_movies_entries * bi - bu_sum) + 2.0 * l_reg * bi
        bi -= learning_rate * bi_gradient 
 
        if it % 10 == 0:
            print("compute loss...")
            print(compute_loss(mat, mu, bu, bi, l_reg=l_reg))

    return bu, bi

Correlation_based_neighborhood_model.py

  • 2. More on Neighborhood ModelBasic Item-oriented Neighborhood Model

  • Explicit data(1 - 5 범위의 평가점수)만을 사용한 Neighborhood model

  • Rating Function
    rui^=bui+jSk(i;u)sij(rujbuj)jSk(i;u)sij\hat{r_{ui}} = b_{ui} + { \sum_{j \in S^{k}(i; u)} s_{ij}(r_{uj} - b_{uj}) \over \sum_{j \in S^{k}(i; u)} s_{ij} }

def predict_r_ui(mat, u, i, mu, S, Sk_iu, baseline_bu, baseline_bi):
    bui = mu + baseline_bu[u] + baseline_bi[0, i]
    buj = mu + baseline_bu[u] + baseline_bi[0, Sk_iu]
    return bui + 1 / S[i, Sk_iu].sum() * (S[i, Sk_iu].toarray().ravel() * (mat[u, Sk_iu].toarray().ravel() - buj)).sum()
  • Loss function
    Baseline의 Loss function과 동일

  • Shrunk coefficients matrix

    sij=nijnij+λ2ρijs_{ij} = {n_{ij} \over n_{ij} + \lambda_{2}} \rho_{ij}

    • item간 similarity를 의미하는 pearson correlation 기반의 shrunk coefficients matrix 계산
    • nij = number of users rated both i and j
    • 더 많은 수의 유저가 평가할 수록 유사도가 높다고 판단할 수 있기에 이를 반영하여 유사도 식 도출
def correlation_based_neighbourhood_model(mat, mat_file, l_reg2=100.0, k=250):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
    mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    #baseline_bu, baseline_bi = baseline_estimator(mat)
    # We should call baseline_estimator but we can init at random for test
    baseline_bu, baseline_bi = np.random.rand(no_users, 1)  * 2 - 1, np.random.rand(1, no_movies) * 2 - 1    

    #bu_index, bi_index = pre_processing(mat, mat_file)

    mu = mat.data[:].mean()

    # Compute similarity matrix (= shrunk correlation matrix)
    N = sparse.csr_matrix(mat).copy()
    N.data[:] = 1
    S = sparse.csr_matrix.dot(N.T, N)
    S.data[:] = S.data[:] / (S.data[:] + l_reg2)
    S = S * compute_sparse_correlation_matrix(mat)

    # Computation
    print("Computation...")
    n_iter = 200
    cx = mat.tocoo()
    r_ui_mat = []
    for u,i,v in zip(cx.row, cx.col, cx.data):
        Sk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel()
        r_ui = predict_r_ui(mat, u, i, mu, S, Sk_iu, baseline_bu, baseline_bi)
        r_ui_mat.append((u, i, r_ui[0]))

    data = list(map(lambda x: x[2], r_ui_mat))
    col = list(map(lambda x: x[1], r_ui_mat))
    row = list(map(lambda x: x[0], r_ui_mat))
    r_ui_pred = sparse.csr_matrix((data, (row, col)), shape=mat.shape)

    print((mat - r_ui_pred).sum())

    return r_ui_pred

Correlation_based_implicit_neighborhood_model.py

  • 2. More on Neighborhood modelFinal Neighborhood Model

  • explicit data와 implicit data(사용자가 아이템을 평가했는지에 대한 여부(0 or 1)를 나타내는 데이터) 모두 사용한 최종 neighborhood model

  • Rating Function

    rui^=μ+bu+bi+Rk(i;u)12jRk(i;u)(rujbuj)wij+Nk(i;u)12jNk(i;u)cij\hat{r_{ui}} = \mu + b_{u} + b_{i} + \lvert R^{k}(i; u) \rvert ^{- {1 \over 2}} \sum_{j \in R^{k}(i; u)}(r_{uj} - b_{uj})w_{ij} + \lvert N^{k}(i; u) \rvert ^{- {1 \over 2}} \sum_{j \in N^{k}(i; u)}c_{ij}

    • Notation
      • R(u)R(u) : user u provided ratings
      • N(u)N(u) : user u provided implicit
      • Sk(i)S^{k}(i) : k similar items with item i
      • RK(i)R^{K}(i) : Intersection of R(u)R(u) and Sk(i)S^{k}(i)
      • NK(i)N^{K}(i) : Intersection of N(u)N(u) and Sk(i)S^{k}(i)
    • Parameter
      • bub_{u}
      • bib_{i}
      • wijw_{ij} : global weights instead of user specific weights
      • cijc_{ij} : offsets added to baseline estimates
def predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi):
    buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
    Rk_iu_sum = np.multiply((mat[u, Rk_iu] - buj), wij[i][Rk_iu]).sum()
    Nk_iu_sum = cij[i][Rk_iu].sum()
    return mu + bu[u] + bi[0, i] + Rk_iu_sum / sqrt(len(Rk_iu)) + Nk_iu_sum / sqrt(len(Nk_iu))

def compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi):
    return mat[u, i] - predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi)
  • Loss function

    minb,w,c(u,i)K(ruiμbubiNk(i;u)12jNk(i;u)cijRk(i;u)12jRk(i;u)(rujbuj)wij)2+λ4(bu2+bi2+jRk(i;u)wij2+jNk(i;u)cij2)\min\limits_{b_{*}, w_{*}, c_{*}}\sum_{(u,i)\in K}\bigg(r_{ui} - \mu - b_{u} - b_{i} - \lvert N^{k}(i; u) \rvert ^{- {1 \over 2}} \sum_{j \in N^{k}(i; u)}c_{ij} - \lvert R^{k}(i; u) \rvert ^{- {1 \over 2}} \sum_{j \in R^{k}(i; u)}(r_{uj} - b_{uj})w_{ij}\bigg)^2 + \lambda_{4}\bigg(b_{u}^2 + b_{i}^2 + \sum_{j \in R^{k}(i; u)}w_{ij}^2 + \sum_{j \in N^{k}(i; u)}c_{ij}^2\bigg)

def compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, l_reg=0.002):
    loss = 0
    loss_reg = 0
    cx = mat.tocoo()        
    for u,i,v in zip(cx.row, cx.col, cx.data):
        r_ui_pred = predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi)
        Rk_iu_sum = (wij[i][Rk_iu] ** 2).sum()
        Nk_iu_sum = (cij[i][Rk_iu] ** 2).sum()
        loss += (mat[u, i] - r_ui_pred) ** 2 
        loss_reg += l_reg * ((bu ** 2).sum() + (bi ** 2).sum() + Rk_iu_sum + Nk_iu_sum) 

    return loss, loss+loss_reg
  • Parameter bu,bi,wij,cijb_{u}, b_{i}, w_{ij}, c_{ij} : Gradient Descent Update
def correlation_based_implicit_neighbourhood_model(mat, mat_file, l_reg=0.002, gamma=0.005, l_reg2=100.0, k=250):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
    mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    #baseline_bu, baseline_bi = baseline_estimator(mat)
    # We should call baseline_estimator but we can init at random for test
    baseline_bu, baseline_bi = np.random.rand(no_users, 1)  * 2 - 1, np.random.rand(1, no_movies) * 2 - 1    

    bu_index, bi_index = pre_processing(mat, mat_file)
    
    # Init parameters
    bu = np.random.rand(no_users, 1)  * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    wij = np.random.rand(no_movies, no_movies) * 2 - 1
    cij = np.random.rand(no_movies, no_movies) * 2 - 1

    mu = mat.data[:].mean()

    # Compute similarity matrix
    N = sparse.csr_matrix(mat).copy()
    N.data[:] = 1
    S = sparse.csr_matrix.dot(N.T, N)
    S.data[:] = S.data[:] / (S.data[:] + l_reg2)
    S = S * compute_sparse_correlation_matrix(mat)

    # Train
    print("Train...")
    n_iter = 200
    cx = mat.tocoo()        
    for it in range(n_iter):
        t0 = time()
        for u,i,v in zip(cx.row, cx.col, cx.data):
            #Rk_iu = Nk_iu = bi_index[u]
            Rk_iu = Nk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel()
            e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi)

            bu[u] += gamma * (e_ui - l_reg * bu[u])
            bi[0, i] += gamma * (e_ui - l_reg * bi[0, i])

            buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
            wij[i][Rk_iu] += gamma * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg * wij[i][Rk_iu] )
            cij[i][Nk_iu] += gamma * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg * cij[i][Nk_iu] )
        gamma *= 0.99

        if it % 10 == 0:
            t1 = time()
            print(it, "\ ", n_iter, "(%.2g sec)" % (t1 - t0))
            print("compute loss...")
            print(compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, l_reg=l_reg))

    return bu, bi, wij, cij

svd_more_more.py

  • 3. More on Latent Factor ModelModify2(SVD++) = Final Latent Model

  • Added specific user latent vector (pup_{u})

  • Rating function

    rui^=bui+qiT(pu+N(u)12jN(u)yj)\hat{r_{ui}} = b_{ui} + q_{i}^{T}\bigg(p_{u} + \lvert N(u) \rvert^{- {1 \over 2}} \sum_{j \in N(u)} y_{j} \bigg)

    • Parameter
      • bub_{u}
      • bib_{i}
      • qiq_{i} : item-factors vector
      • pup_{u} : user-factors vector
      • yjy_{j} :
def predict_r_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj):
    N_u_sum = yj[N_u].sum(0)
    return mu + bu[u] + bi[0, i] + np.dot(qi[i], (pu[u] + N_u_sum / sqrt(len(N_u))))

def compute_e_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj):
    return mat[u, i] - predict_r_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj)
  • Loss function

    minq,x,y,b(u,i)K(ruiμbubiqiT(R(u)12jR(u)(rujbuj)xj+N(u)12jN(u)yj))2+λ4(bu2+bi2+qi2+jR(u)xj2+jN(u)yj2)\min\limits_{q_{*}, x_{*}, y_{*}, b_{*}}\sum_{(u,i)\in K}\bigg(r_{ui} - \mu - b_{u} - b_{i} - q_{i}^{T} \big( \lvert R(u) \rvert ^{- 1 \over 2} \sum_{j \in R(u)}(r_{uj} - b_{uj})x_{j} + \lvert N(u) \rvert ^{-1 \over 2} \sum_{j \in N(u)} y_{j} \big) \bigg)^2 + \lambda_{4}\bigg( b_{u}^2 + b_{i}^2 + \|q_{i}^2\| + \sum_{j \in R(u)}\|x_{j}\|^2 + \sum_{j \in N(u)}\|y_{j}\|^2 \bigg)

def compute_loss(mat, mu, bu, bi, qi, pu, N_u, yj, l_reg6=0.005, l_reg7=0.015):
    loss = 0
    loss_reg = 0
    cx = mat.tocoo()
    for u,i,v in zip(cx.row, cx.col, cx.data):
        r_ui_pred = predict_r_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj)
        loss += (mat[u, i] - r_ui_pred) ** 2 
        loss_reg += l_reg6 * ((bu ** 2).sum() + (bi ** 2).sum())
        loss_reg += l_reg7 * ((qi[i]**2).sum() + (pu[u]**2).sum() + (yj[N_u]**2).sum())

    return loss, loss+loss_reg
  • Parameter bu,bi,qi,pu,yjb_{u}, b_{i}, q_{i}, p_{u}, y_{j} : Gradient Descent Update
def svd_more_more(mat, mat_file, gamma1=0.007, gamma2=0.007, gamma3=0.001, l_reg2=100, l_reg6=0.005, l_reg7=0.015, f=50):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
    mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    bu_index, bi_index = pre_processing(mat, mat_file)
    
    # Init parameters
    bu = np.random.rand(no_users, 1)  * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    qi = np.random.rand(no_movies, f) * 2 - 1
    pu = np.random.rand(no_users, f) * 2 - 1
    yj = np.random.rand(no_movies, f) * 2 - 1

    mu = mat.data[:].mean()

    # Train
    print("Train...")
    n_iter = 200
    cx = mat.tocoo()
    for it in range(n_iter):
        for u,i,v in zip(cx.row, cx.col, cx.data):
            N_u = bi_index[u]
            e_ui = compute_e_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj)

            bu[u] += gamma1 * (e_ui - l_reg6 * bu[u])
            bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i])
            qi[i] += gamma2 * (e_ui * (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) - l_reg7 * qi[i])
            pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u])
            yj[N_u] += gamma2 * (e_ui * 1/ sqrt(len(N_u)) * qi[i] - l_reg7 * yj[N_u])
        gamma1 *= 0.9
        gamma2 *= 0.9

        if it % 10 == 0:
            print(it, "\ ", n_iter)         
            print("compute loss...")
            print(compute_loss(mat, mu, bu, bi, qi, pu, N_u, yj, l_reg6=l_reg6, l_reg7=l_reg7))
    
    return bu, bi, qi, pu, yj

Integrated Model.py

  • Final Neighborhood Model + Final Latent Model

  • Rating Function

    rui^=μ+bu+bi++qiT(pu+N(u)12jN(u)yj)+Rk(i;u)12jRk(i;u)(rujbuj)wij+Nk(i;u)12jNk(i;u)cij\hat{r_{ui}} = \mu + b_{u} + b_{i} + + q_{i}^{T}\bigg(p_{u} + \lvert N(u) \rvert^{- {1 \over 2}} \sum_{j \in N(u)} y_{j} \bigg) + \lvert R^{k}(i; u) \rvert ^{- {1 \over 2}} \sum_{j \in R^{k}(i; u)}(r_{uj} - b_{uj})w_{ij} + \lvert N^{k}(i; u) \rvert ^{- {1 \over 2}} \sum_{j \in N^{k}(i; u)}c_{ij}

    Final Neighborhood Model의 rating function과 Final Latent Model의 rating function을 더한 값과 동일

    • Parameter
      • bub_{u}
      • bib_{i}
      • wijw_{ij}
      • cijc_{ij}
      • qiq_{i}
      • pup_{u}
      • yjy_{j}
def predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj):
    buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
    Rk_iu_sum = np.multiply((mat[u, Rk_iu] - buj), wij[i][Rk_iu]).sum()
    Nk_iu_sum = cij[i][Rk_iu].sum()
    N_u_sum = yj[N_u].sum(0)
    return mu + bu[u] + bi[0, i] + np.dot(qi[i], (pu[u] + N_u_sum / sqrt(len(N_u)))) + Rk_iu_sum / sqrt(len(Rk_iu)) + Nk_iu_sum / sqrt(len(Nk_iu))

def compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj):
    return mat[u, i] - predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj)
  • Loss function
    Final Neighborhood Model의 loss function과 Final Latent Model의 loss function을 더한 값과 동일
def compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj, l_reg6=0.005, l_reg7=0.015, l_reg8=0.015):
    loss = 0
    loss_reg = 0
    cx = mat.tocoo()
    for u,i,v in zip(cx.row, cx.col, cx.data):
        r_ui_pred = predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj)
        Rk_iu_sum = (wij[i][Rk_iu] ** 2).sum()
        Nk_iu_sum = (cij[i][Rk_iu] ** 2).sum()
        loss += (mat[u, i] - r_ui_pred) ** 2
        loss_reg += l_reg6 * ((bu ** 2).sum() + (bi ** 2).sum())
        loss_reg += l_reg8 * (Rk_iu_sum + Nk_iu_sum) 
        loss_reg += l_reg7 * ((qi[i]**2).sum() + (pu[u]**2).sum() + (yj[N_u]**2).sum())

    return loss, loss+loss_reg
  • Parameter bu,bi,wij,cij,qi,pu,yjb_{u}, b_{i}, w_{ij}, c_{ij}, q_{i}, p_{u}, y_{j} : Gradient Descent Update
def integrated_model(mat, mat_file, gamma1=0.007, gamma2=0.007, gamma3=0.001, l_reg2=100, l_reg6=0.005, l_reg7=0.015, l_reg8=0.015, k=300, f=50):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
    mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    #baseline_bu, baseline_bi = baseline_estimator(mat)
    # We should call baseline_estimator but we can init at random for test
    baseline_bu, baseline_bi = np.random.rand(no_users, 1)  * 2 - 1, np.random.rand(1, no_movies) * 2 - 1    

    bu_index, bi_index = pre_processing(mat, mat_file)
    
    # Init parameters
    bu = np.random.rand(no_users, 1)  * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    wij = np.random.rand(no_movies, no_movies) * 2 - 1
    cij = np.random.rand(no_movies, no_movies) * 2 - 1
    qi = np.random.rand(no_movies, f) * 2 - 1
    pu = np.random.rand(no_users, f) * 2 - 1
    yj = np.random.rand(no_movies, f) * 2 - 1

    mu = mat.data[:].mean()
    N = sparse.csr_matrix(mat).copy()
    N.data[:] = 1
    S = sparse.csr_matrix.dot(N.T, N)
    S.data[:] = S.data[:] / (S.data[:] + l_reg2)
    S = S * compute_sparse_correlation_matrix(mat)

    # Train
    print("Train...")
    n_iter = 200
    cx = mat.tocoo()
    for it in range(n_iter):
        for u,i,v in zip(cx.row, cx.col, cx.data):
            #Rk_iu = Nk_iu = bi_index[u]
            N_u = bi_index[u]
            Rk_iu = Nk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel()
            e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj)

            bu[u] += gamma1 * (e_ui - l_reg6 * bu[u])
            bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i])
            qi[i] += gamma2 * (e_ui * (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) - l_reg7 * qi[i])
            pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u])
            yj[N_u] += gamma2 * (e_ui * 1/ sqrt(len(N_u)) * qi[i] - l_reg7 * yj[N_u])
            buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
            wij[i][Rk_iu] += gamma3 * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg8 * wij[i][Rk_iu] )
            cij[i][Nk_iu] += gamma3 * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg8 * cij[i][Nk_iu] )                
        gamma1 *= 0.9
        gamma2 *= 0.9
        gamma3 *= 0.9

        if it % 10 == 0:
            print(it, "\ ", n_iter)         
            print("compute loss...")
            print(compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj, l_reg6=l_reg6, l_reg7=l_reg7, l_reg8=l_reg8))

    return bu, bi, qi, pu, yj, wij, cij
profile
2021 투빅스 추천시스템 세미나입니다.

0개의 댓글