Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
T
Tuned_AsymmetricSVD
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Winnie Uyen Nguyen
Tuned_AsymmetricSVD
Commits
e7016544
Commit
e7016544
authored
2 years ago
by
Winnie Uyen Nguyen
Browse files
Options
Downloads
Patches
Plain Diff
Upload Tuning techniques for the alogorithm
parent
b80760ea
Branches
main
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
TuningAsym_SVD.py
+204
-0
204 additions, 0 deletions
TuningAsym_SVD.py
with
204 additions
and
0 deletions
TuningAsym_SVD.py
0 → 100644
+
204
−
0
View file @
e7016544
import
pandas
as
pd
import
numpy
as
np
from
sklearn.metrics
import
fbeta_score
,
make_scorer
from
sklearn.model_selection
import
GridSearchCV
from
sklearn.base
import
BaseEstimator
,
ClassifierMixin
import
numpy
as
np
names
=
[
'
user_id
'
,
'
item_id
'
,
'
rating
'
,
'
timestamp
'
]
df
=
pd
.
read_csv
(
'
/Users/winnie/Desktop/CS488/incremental-approsvd/ml-100k/u.data
'
,
sep
=
'
\t
'
,
names
=
names
)
n_users
=
df
.
user_id
.
unique
().
shape
[
0
]
n_items
=
df
.
item_id
.
unique
().
shape
[
0
]
print
(
"
n_users
"
,
n_users
)
print
(
"
n_items
"
,
n_items
)
# Create r_{ui}, our ratings matrix
ratings
=
np
.
zeros
((
n_users
,
n_items
))
#Assign rating for each element in the ratings matrix
for
row
in
df
.
itertuples
():
if
row
[
1
]
-
1
<
n_users
and
row
[
2
]
-
1
<
n_items
:
ratings
[
row
[
1
]
-
1
,
row
[
2
]
-
1
]
=
row
[
3
]
# ratings[user_id, item_id] = rating
# Split into training and test sets.
# Remove 10 ratings for each user from the training set and assign them to the test set
def
train_test_split
(
ratings
):
test
=
np
.
zeros
(
ratings
.
shape
)
train
=
ratings
.
copy
()
for
user
in
range
(
ratings
.
shape
[
0
]):
test_ratings
=
np
.
random
.
choice
(
ratings
[
user
,
:].
nonzero
()[
0
],
size
=
10
,
replace
=
False
)
#Filling missing ratings as 0
train
[
user
,
test_ratings
]
=
0.
test
[
user
,
test_ratings
]
=
ratings
[
user
,
test_ratings
]
# Test and training are truly disjoint
assert
(
np
.
all
((
train
*
test
)
==
0
))
return
train
,
test
train
,
test
=
train_test_split
(
ratings
)
#indicating matrix created from train set, with value =1; no value =0
indicating_mat
=
np
.
vectorize
(
lambda
x
:
0
if
x
==
0
else
1
)(
train
)
#turning indicating matrix into True-False value
mask
=
indicating_mat
==
1
from
sklearn.metrics
import
mean_squared_error
from
sklearn.metrics
import
mean_absolute_error
def
get_rmse
(
pred
,
actual
):
# Ignore nonzero terms.
pred
=
pred
[
actual
.
nonzero
()].
flatten
()
actual
=
actual
[
actual
.
nonzero
()].
flatten
()
rmse
=
mean_squared_error
(
actual
,
pred
,
sample_weight
=
None
,
multioutput
=
'
uniform_average
'
,
squared
=
False
)
print
(
"
rmse
"
,
rmse
)
return
rmse
def
get_mae
(
pred
,
actual
):
# Ignore nonzero terms.
pred
=
pred
[
actual
.
nonzero
()].
flatten
()
actual
=
actual
[
actual
.
nonzero
()].
flatten
()
mae
=
mean_absolute_error
(
actual
,
pred
,
sample_weight
=
None
,
multioutput
=
'
uniform_average
'
)
print
(
"
mae
"
,
mae
)
return
mae
class
MyClassifier
(
BaseEstimator
,
ClassifierMixin
):
def
__init__
(
self
,
df
,
k
=
1
,
lrate
=
0
):
self
.
df
=
df
self
.
n_users
=
df
.
user_id
.
unique
().
shape
[
0
]
self
.
n_items
=
df
.
item_id
.
unique
().
shape
[
0
]
# Create r_{ui}, our ratings matrix
self
.
ratings
=
np
.
zeros
((
self
.
n_users
,
self
.
n_items
))
for
row
in
df
.
itertuples
():
self
.
ratings
[
row
[
1
]
-
1
,
row
[
2
]
-
1
]
=
row
[
3
]
self
.
k
=
k
self
.
lrate
=
lrate
self
.
_lambda
=
0.001
def
get_item_item
(
self
,
u
,
mu
,
b_i
,
b_u
,
x_j
):
#baseline estimate for r_{ui}
#mu: average result of all ratings in the matrix
#b_u: user bias
#b_i: item bias
#x_j
temp
=
np
.
zeros
(
self
.
k
)
for
j
in
indicating_mat
[
u
,:].
nonzero
()[
0
]:
temp
+=
(
self
.
ratings
[
u
,
j
]
-
mu
-
b_u
[
u
]
-
b_i
[
j
])
*
x_j
[
j
,
:]
return
temp
*
self
.
R
[
u
]
#Asymmetric SVD rating estimate equationn(predict)
def
predict_one
(
self
,
u
,
i
):
#q_i
#item_item: predicted rating
#N_u
#y_j: implicit rating
item_item
=
self
.
get_item_item
(
u
,
self
.
global_bias
,
self
.
b_i
,
self
.
b_u
,
self
.
x_j
)
return
self
.
global_bias
+
self
.
b_i
[
i
]
+
self
.
b_u
[
u
]
+
self
.
q_i
[
i
,:].
T
.
dot
(
item_item
+
self
.
N
[
u
]
*
self
.
y_j
[
self
.
mask
[
u
,:],
:].
sum
(
axis
=
0
))
def
svdasy_step
(
self
):
rows
=
np
.
random
.
permutation
(
len
(
self
.
non_zeros
[
0
]))
size
=
len
(
self
.
non_zeros
[
0
])
c
=
0
for
i
in
rows
:
c
+=
1
if
((
c
%
0x1000
)
==
0
):
print
(
c
,
size
)
user
=
self
.
non_zeros
[
0
][
i
]
item
=
self
.
non_zeros
[
1
][
i
]
pred
=
self
.
predict_one
(
user
,
item
)
## Watch out to turn learning rate separately, this needs to be calculate separately
error
=
self
.
train
[
user
][
item
]
-
pred
#Calculate the predicted ratings
self
.
b_u
[
user
]
+=
self
.
lrate
*
(
error
-
self
.
_lambda
*
self
.
b_u
[
user
])
self
.
b_i
[
item
]
+=
self
.
lrate
*
(
error
-
self
.
_lambda
*
self
.
b_i
[
item
])
item_item
=
self
.
get_item_item
(
user
,
self
.
global_bias
,
self
.
b_i
,
self
.
b_u
,
self
.
x_j
)
## Update for q_i (item vector)
self
.
q_i
[
item
,
:]
+=
self
.
lrate
*
(
error
*
(
item_item
+
self
.
N
[
user
]
*
self
.
y_j
[
self
.
mask
[
user
,
:],
:].
sum
(
axis
=
0
))
-
self
.
_lambda
*
self
.
q_i
[
item
,
:])
# Update for x_j (user vector)
j
=
indicating_mat
[
user
,:].
nonzero
()
self
.
x_j
[
j
,
:]
+=
self
.
lrate
*
(
error
*
item_item
*
self
.
q_i
[
item
,
:]
-
self
.
_lambda
*
self
.
x_j
[
j
,
:])
# Update for each y_j
temp
=
error
*
self
.
N
[
user
]
*
self
.
q_i
[
item
,
:]
j
=
indicating_mat
[
user
,:].
nonzero
()
self
.
y_j
[
j
,:]
+=
self
.
lrate
*
(
temp
-
self
.
_lambda
*
self
.
y_j
[
j
,:])
def
fit
(
self
,
train
,
test
):
self
.
test
=
test
self
.
train
=
train
self
.
mask
=
indicating_mat
==
1
self
.
global_bias
=
np
.
mean
(
self
.
train
[
np
.
where
(
self
.
train
!=
0
)])
#average of all ratings which is not zero in ratings matrix
self
.
non_zeros
=
self
.
train
.
nonzero
()
# This is equivalent of taking the length and doing the square root.
#Calculate norm for matrix using linalg.norm()
# N = np.power(indicating_mat.sum(1), -0.5)
self
.
N
=
1.
/
np
.
linalg
.
norm
(
indicating_mat
,
axis
=
1
)
self
.
R
=
1.
/
np
.
linalg
.
norm
(
self
.
train
,
axis
=
1
)
self
.
n_user
,
self
.
n_item
=
self
.
train
.
shape
# Parameters: create an array having the same size as the number of user/ number of item and make all the values inside=0
self
.
b_u
=
np
.
zeros
(
self
.
n_user
)
self
.
b_i
=
np
.
zeros
(
self
.
n_item
)
#q_i
self
.
q_i
=
np
.
random
.
normal
(
scale
=
1.
/
self
.
k
,
size
=
(
self
.
n_item
,
self
.
k
))
#P_u
self
.
x_j
=
np
.
random
.
normal
(
scale
=
1.
/
self
.
k
,
size
=
(
self
.
n_item
,
self
.
k
))
#y_i
self
.
y_j
=
np
.
random
.
normal
(
scale
=
1.
/
self
.
k
,
size
=
(
self
.
n_item
,
self
.
k
))
for
i
in
range
(
15
):
self
.
svdasy_step
()
return
self
def
predict
(
self
,
X
):
n_user
,
n_item
=
X
.
shape
predictions
=
np
.
copy
(
X
)
for
user
in
range
(
n_user
):
for
item
in
range
(
n_item
):
if
X
[
user
,
item
]
!=
0
:
predictions
[
user
,
item
]
=
self
.
predict_one
(
user
,
item
)
return
predictions
params
=
{
"
k
"
:
[
50
,
100
,
150
],
"
lrate
"
:
[
0.001
,
0.002
],
}
rmse_score
=
make_scorer
(
get_rmse
,
greater_is_better
=
False
)
mae_score
=
make_scorer
(
get_mae
,
greater_is_better
=
False
)
score_dictionary
=
{
'
rmse
'
:
rmse_score
,
'
mae
'
:
mae_score
}
gs
=
GridSearchCV
(
MyClassifier
(
df
),
param_grid
=
params
,
scoring
=
score_dictionary
,
cv
=
5
,
n_jobs
=
20
,
refit
=
False
)
gs
.
fit
(
train
,
test
)
print
(
gs
.
best_params
[
'
rmse_score
'
])
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment