작성자 : 이혜린
./Datasets/파일명
utils.py
에서 경로를 ./Datasets/로 수정하기 rating_compiler.py
실행 baseline_estimator.py
correlation_based_neighbourhood_model.py
correlation_based_implicit_neighbourhood_model.py
svd_more_more.py
integrated_model.py
Loss function
def compute_loss(mat, mu, bu, bi, l_reg=0.02):
loss = 0
no_users_entries = np.array((mat != 0).sum(1)).T.ravel()
bu_rep = np.repeat(bu.ravel(), no_users_entries)
no_movies_entries = np.array((mat != 0).sum(0)).ravel()
bi_rep = np.repeat(bi.ravel(), no_movies_entries)
temp_mat = sparse.csc_matrix(mat).copy()
temp_mat.data[:] -= bi_rep
temp_mat.data[:] -= mu
temp_mat = sparse.coo_matrix(temp_mat)
temp_mat = sparse.csr_matrix(temp_mat)
temp_mat.data[:] -= bu_rep
loss = (temp_mat.data[:] ** 2).sum()
loss_reg = l_reg * ((bu**2).sum() + (bi**2).sum())
#loss += loss_reg
return loss, loss+loss_reg
def baseline_estimator(mat, mat_file, l_reg=0.02, learning_rate=0.0000025):
# subsample the matrix to make computation faster
mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]
print(mat.shape)
no_users = mat.shape[0]
no_movies = mat.shape[1]
bu_index, bi_index = pre_processing(mat, mat_file)
bu = np.random.rand(no_users,1) * 2 - 1 # set initial value
bi = np.random.rand(1,no_movies) * 2 - 1
#bu = np.zeros((no_users,1))
#bi = np.zeros((1,no_movies))
mu = mat.data[:].mean()
mat_sum1 = mat.sum(1)
mat_sum0 = mat.sum(0)
n = mat.data[:].shape[0]
no_users_entries = np.array((mat != 0).sum(1))
no_movies_entries = np.array((mat != 0).sum(0))
# Train
print("Train...")
n_iter = 200
for it in range(n_iter):
#bi_sum = bi[bi_index].sum(1).reshape((no_users,1))
#bu_sum = bu.ravel()[bu_index].sum(0).reshape((1,no_movies))
# gradient update
bi_sum = np.array(list(map(lambda x:bi.ravel()[x].sum(), bi_index))).reshape((no_users,1))
bu_sum = np.array(list(map(lambda x:bu.ravel()[x].sum(), bu_index))).reshape((1,no_movies))
# Vectorized operations
bu_gradient = - 2.0 * (mat_sum1 - no_users_entries * mu - no_users_entries * bu - bi_sum) + 2.0 * l_reg * bu
bu -= learning_rate * bu_gradient
bi_gradient = - 2.0 * (mat_sum0 - no_movies_entries * mu - no_movies_entries * bi - bu_sum) + 2.0 * l_reg * bi
bi -= learning_rate * bi_gradient
if it % 10 == 0:
print("compute loss...")
print(compute_loss(mat, mu, bu, bi, l_reg=l_reg))
return bu, bi
2. More on Neighborhood Model
→ Basic Item-oriented Neighborhood Model
Explicit data(1 - 5 범위의 평가점수)만을 사용한 Neighborhood model
Rating Function
def predict_r_ui(mat, u, i, mu, S, Sk_iu, baseline_bu, baseline_bi):
bui = mu + baseline_bu[u] + baseline_bi[0, i]
buj = mu + baseline_bu[u] + baseline_bi[0, Sk_iu]
return bui + 1 / S[i, Sk_iu].sum() * (S[i, Sk_iu].toarray().ravel() * (mat[u, Sk_iu].toarray().ravel() - buj)).sum()
Loss function
Baseline의 Loss function과 동일
Shrunk coefficients matrix
def correlation_based_neighbourhood_model(mat, mat_file, l_reg2=100.0, k=250):
# subsample the matrix to make computation faster
mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]
print(mat.shape)
no_users = mat.shape[0]
no_movies = mat.shape[1]
#baseline_bu, baseline_bi = baseline_estimator(mat)
# We should call baseline_estimator but we can init at random for test
baseline_bu, baseline_bi = np.random.rand(no_users, 1) * 2 - 1, np.random.rand(1, no_movies) * 2 - 1
#bu_index, bi_index = pre_processing(mat, mat_file)
mu = mat.data[:].mean()
# Compute similarity matrix (= shrunk correlation matrix)
N = sparse.csr_matrix(mat).copy()
N.data[:] = 1
S = sparse.csr_matrix.dot(N.T, N)
S.data[:] = S.data[:] / (S.data[:] + l_reg2)
S = S * compute_sparse_correlation_matrix(mat)
# Computation
print("Computation...")
n_iter = 200
cx = mat.tocoo()
r_ui_mat = []
for u,i,v in zip(cx.row, cx.col, cx.data):
Sk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel()
r_ui = predict_r_ui(mat, u, i, mu, S, Sk_iu, baseline_bu, baseline_bi)
r_ui_mat.append((u, i, r_ui[0]))
data = list(map(lambda x: x[2], r_ui_mat))
col = list(map(lambda x: x[1], r_ui_mat))
row = list(map(lambda x: x[0], r_ui_mat))
r_ui_pred = sparse.csr_matrix((data, (row, col)), shape=mat.shape)
print((mat - r_ui_pred).sum())
return r_ui_pred
2. More on Neighborhood model
→ Final Neighborhood Model
explicit data와 implicit data(사용자가 아이템을 평가했는지에 대한 여부(0 or 1)를 나타내는 데이터) 모두 사용한 최종 neighborhood model
Rating Function
def predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi):
buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
Rk_iu_sum = np.multiply((mat[u, Rk_iu] - buj), wij[i][Rk_iu]).sum()
Nk_iu_sum = cij[i][Rk_iu].sum()
return mu + bu[u] + bi[0, i] + Rk_iu_sum / sqrt(len(Rk_iu)) + Nk_iu_sum / sqrt(len(Nk_iu))
def compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi):
return mat[u, i] - predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi)
Loss function
def compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, l_reg=0.002):
loss = 0
loss_reg = 0
cx = mat.tocoo()
for u,i,v in zip(cx.row, cx.col, cx.data):
r_ui_pred = predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi)
Rk_iu_sum = (wij[i][Rk_iu] ** 2).sum()
Nk_iu_sum = (cij[i][Rk_iu] ** 2).sum()
loss += (mat[u, i] - r_ui_pred) ** 2
loss_reg += l_reg * ((bu ** 2).sum() + (bi ** 2).sum() + Rk_iu_sum + Nk_iu_sum)
return loss, loss+loss_reg
def correlation_based_implicit_neighbourhood_model(mat, mat_file, l_reg=0.002, gamma=0.005, l_reg2=100.0, k=250):
# subsample the matrix to make computation faster
mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]
print(mat.shape)
no_users = mat.shape[0]
no_movies = mat.shape[1]
#baseline_bu, baseline_bi = baseline_estimator(mat)
# We should call baseline_estimator but we can init at random for test
baseline_bu, baseline_bi = np.random.rand(no_users, 1) * 2 - 1, np.random.rand(1, no_movies) * 2 - 1
bu_index, bi_index = pre_processing(mat, mat_file)
# Init parameters
bu = np.random.rand(no_users, 1) * 2 - 1
bi = np.random.rand(1, no_movies) * 2 - 1
wij = np.random.rand(no_movies, no_movies) * 2 - 1
cij = np.random.rand(no_movies, no_movies) * 2 - 1
mu = mat.data[:].mean()
# Compute similarity matrix
N = sparse.csr_matrix(mat).copy()
N.data[:] = 1
S = sparse.csr_matrix.dot(N.T, N)
S.data[:] = S.data[:] / (S.data[:] + l_reg2)
S = S * compute_sparse_correlation_matrix(mat)
# Train
print("Train...")
n_iter = 200
cx = mat.tocoo()
for it in range(n_iter):
t0 = time()
for u,i,v in zip(cx.row, cx.col, cx.data):
#Rk_iu = Nk_iu = bi_index[u]
Rk_iu = Nk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel()
e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi)
bu[u] += gamma * (e_ui - l_reg * bu[u])
bi[0, i] += gamma * (e_ui - l_reg * bi[0, i])
buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
wij[i][Rk_iu] += gamma * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg * wij[i][Rk_iu] )
cij[i][Nk_iu] += gamma * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg * cij[i][Nk_iu] )
gamma *= 0.99
if it % 10 == 0:
t1 = time()
print(it, "\ ", n_iter, "(%.2g sec)" % (t1 - t0))
print("compute loss...")
print(compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, l_reg=l_reg))
return bu, bi, wij, cij
3. More on Latent Factor Model
→ Modify2(SVD++)
= Final Latent Model
Added specific user latent vector ()
Rating function
def predict_r_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj):
N_u_sum = yj[N_u].sum(0)
return mu + bu[u] + bi[0, i] + np.dot(qi[i], (pu[u] + N_u_sum / sqrt(len(N_u))))
def compute_e_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj):
return mat[u, i] - predict_r_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj)
Loss function
def compute_loss(mat, mu, bu, bi, qi, pu, N_u, yj, l_reg6=0.005, l_reg7=0.015):
loss = 0
loss_reg = 0
cx = mat.tocoo()
for u,i,v in zip(cx.row, cx.col, cx.data):
r_ui_pred = predict_r_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj)
loss += (mat[u, i] - r_ui_pred) ** 2
loss_reg += l_reg6 * ((bu ** 2).sum() + (bi ** 2).sum())
loss_reg += l_reg7 * ((qi[i]**2).sum() + (pu[u]**2).sum() + (yj[N_u]**2).sum())
return loss, loss+loss_reg
def svd_more_more(mat, mat_file, gamma1=0.007, gamma2=0.007, gamma3=0.001, l_reg2=100, l_reg6=0.005, l_reg7=0.015, f=50):
# subsample the matrix to make computation faster
mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]
print(mat.shape)
no_users = mat.shape[0]
no_movies = mat.shape[1]
bu_index, bi_index = pre_processing(mat, mat_file)
# Init parameters
bu = np.random.rand(no_users, 1) * 2 - 1
bi = np.random.rand(1, no_movies) * 2 - 1
qi = np.random.rand(no_movies, f) * 2 - 1
pu = np.random.rand(no_users, f) * 2 - 1
yj = np.random.rand(no_movies, f) * 2 - 1
mu = mat.data[:].mean()
# Train
print("Train...")
n_iter = 200
cx = mat.tocoo()
for it in range(n_iter):
for u,i,v in zip(cx.row, cx.col, cx.data):
N_u = bi_index[u]
e_ui = compute_e_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj)
bu[u] += gamma1 * (e_ui - l_reg6 * bu[u])
bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i])
qi[i] += gamma2 * (e_ui * (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) - l_reg7 * qi[i])
pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u])
yj[N_u] += gamma2 * (e_ui * 1/ sqrt(len(N_u)) * qi[i] - l_reg7 * yj[N_u])
gamma1 *= 0.9
gamma2 *= 0.9
if it % 10 == 0:
print(it, "\ ", n_iter)
print("compute loss...")
print(compute_loss(mat, mu, bu, bi, qi, pu, N_u, yj, l_reg6=l_reg6, l_reg7=l_reg7))
return bu, bi, qi, pu, yj
Final Neighborhood Model
+ Final Latent Model
Rating Function
Final Neighborhood Model
의 rating function과 Final Latent Model
의 rating function을 더한 값과 동일
def predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj):
buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
Rk_iu_sum = np.multiply((mat[u, Rk_iu] - buj), wij[i][Rk_iu]).sum()
Nk_iu_sum = cij[i][Rk_iu].sum()
N_u_sum = yj[N_u].sum(0)
return mu + bu[u] + bi[0, i] + np.dot(qi[i], (pu[u] + N_u_sum / sqrt(len(N_u)))) + Rk_iu_sum / sqrt(len(Rk_iu)) + Nk_iu_sum / sqrt(len(Nk_iu))
def compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj):
return mat[u, i] - predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj)
Final Neighborhood Model
의 loss function과 Final Latent Model
의 loss function을 더한 값과 동일def compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj, l_reg6=0.005, l_reg7=0.015, l_reg8=0.015):
loss = 0
loss_reg = 0
cx = mat.tocoo()
for u,i,v in zip(cx.row, cx.col, cx.data):
r_ui_pred = predict_r_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj)
Rk_iu_sum = (wij[i][Rk_iu] ** 2).sum()
Nk_iu_sum = (cij[i][Rk_iu] ** 2).sum()
loss += (mat[u, i] - r_ui_pred) ** 2
loss_reg += l_reg6 * ((bu ** 2).sum() + (bi ** 2).sum())
loss_reg += l_reg8 * (Rk_iu_sum + Nk_iu_sum)
loss_reg += l_reg7 * ((qi[i]**2).sum() + (pu[u]**2).sum() + (yj[N_u]**2).sum())
return loss, loss+loss_reg
def integrated_model(mat, mat_file, gamma1=0.007, gamma2=0.007, gamma3=0.001, l_reg2=100, l_reg6=0.005, l_reg7=0.015, l_reg8=0.015, k=300, f=50):
# subsample the matrix to make computation faster
mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]
print(mat.shape)
no_users = mat.shape[0]
no_movies = mat.shape[1]
#baseline_bu, baseline_bi = baseline_estimator(mat)
# We should call baseline_estimator but we can init at random for test
baseline_bu, baseline_bi = np.random.rand(no_users, 1) * 2 - 1, np.random.rand(1, no_movies) * 2 - 1
bu_index, bi_index = pre_processing(mat, mat_file)
# Init parameters
bu = np.random.rand(no_users, 1) * 2 - 1
bi = np.random.rand(1, no_movies) * 2 - 1
wij = np.random.rand(no_movies, no_movies) * 2 - 1
cij = np.random.rand(no_movies, no_movies) * 2 - 1
qi = np.random.rand(no_movies, f) * 2 - 1
pu = np.random.rand(no_users, f) * 2 - 1
yj = np.random.rand(no_movies, f) * 2 - 1
mu = mat.data[:].mean()
N = sparse.csr_matrix(mat).copy()
N.data[:] = 1
S = sparse.csr_matrix.dot(N.T, N)
S.data[:] = S.data[:] / (S.data[:] + l_reg2)
S = S * compute_sparse_correlation_matrix(mat)
# Train
print("Train...")
n_iter = 200
cx = mat.tocoo()
for it in range(n_iter):
for u,i,v in zip(cx.row, cx.col, cx.data):
#Rk_iu = Nk_iu = bi_index[u]
N_u = bi_index[u]
Rk_iu = Nk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel()
e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj)
bu[u] += gamma1 * (e_ui - l_reg6 * bu[u])
bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i])
qi[i] += gamma2 * (e_ui * (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) - l_reg7 * qi[i])
pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u])
yj[N_u] += gamma2 * (e_ui * 1/ sqrt(len(N_u)) * qi[i] - l_reg7 * yj[N_u])
buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
wij[i][Rk_iu] += gamma3 * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg8 * wij[i][Rk_iu] )
cij[i][Nk_iu] += gamma3 * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg8 * cij[i][Nk_iu] )
gamma1 *= 0.9
gamma2 *= 0.9
gamma3 *= 0.9
if it % 10 == 0:
print(it, "\ ", n_iter)
print("compute loss...")
print(compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj, l_reg6=l_reg6, l_reg7=l_reg7, l_reg8=l_reg8))
return bu, bi, qi, pu, yj, wij, cij