Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
T
Tuned_AsymmetricSVD
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Winnie Uyen Nguyen
Tuned_AsymmetricSVD
Commits
964b6727
Commit
964b6727
authored
2 years ago
by
Winnie Uyen Nguyen
Browse files
Options
Downloads
Patches
Plain Diff
Tuned Asymmetric SVD model
parent
e7016544
Branches
main
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
FinalResultAsym_SVD.py
+170
-0
170 additions, 0 deletions
FinalResultAsym_SVD.py
with
170 additions
and
0 deletions
FinalResultAsym_SVD.py
0 → 100644
+
170
−
0
View file @
964b6727
import
pandas
as
pd
import
numpy
as
np
from
sklearn.metrics
import
fbeta_score
,
make_scorer
from
sklearn.model_selection
import
GridSearchCV
from
sklearn.base
import
BaseEstimator
,
ClassifierMixin
import
numpy
as
np
names
=
[
'
user_id
'
,
'
item_id
'
,
'
rating
'
,
'
timestamp
'
]
df
=
pd
.
read_csv
(
'
/Users/winnie/Desktop/CS488/incremental-approsvd/ml-100k/u.data
'
,
sep
=
'
\t
'
,
names
=
names
)
n_users
=
df
.
user_id
.
unique
().
shape
[
0
]
n_items
=
df
.
item_id
.
unique
().
shape
[
0
]
print
(
"
n_users
"
,
n_users
)
print
(
"
n_items
"
,
n_items
)
# Create r_{ui}, our ratings matrix
ratings
=
np
.
zeros
((
n_users
,
n_items
))
# print("ratings", ratings)
for
row
in
df
.
itertuples
():
if
row
[
1
]
-
1
<
n_users
and
row
[
2
]
-
1
<
n_items
:
ratings
[
row
[
1
]
-
1
,
row
[
2
]
-
1
]
=
row
[
3
]
# ratings[user_id, item_id] = rating
# Split into training and test sets.
# Remove 10 ratings for each user
# and assign them to the test set
def
train_test_split
(
ratings
):
test
=
np
.
zeros
(
ratings
.
shape
)
train
=
ratings
.
copy
()
for
user
in
range
(
ratings
.
shape
[
0
]):
test_ratings
=
np
.
random
.
choice
(
ratings
[
user
,
:].
nonzero
()[
0
],
size
=
10
,
replace
=
False
)
train
[
user
,
test_ratings
]
=
0.
test
[
user
,
test_ratings
]
=
ratings
[
user
,
test_ratings
]
# Test and training are truly disjoint
assert
(
np
.
all
((
train
*
test
)
==
0
))
return
train
,
test
train
,
test
=
train_test_split
(
ratings
)
indicating_mat
=
np
.
vectorize
(
lambda
x
:
0
if
x
==
0
else
1
)(
train
)
mask
=
indicating_mat
==
1
from
sklearn.metrics
import
mean_squared_error
from
sklearn.metrics
import
mean_absolute_error
def
get_rmse
(
pred
,
actual
):
# Ignore nonzero terms.
pred
=
pred
[
actual
.
nonzero
()].
flatten
()
actual
=
actual
[
actual
.
nonzero
()].
flatten
()
rmse
=
mean_squared_error
(
actual
,
pred
,
sample_weight
=
None
,
multioutput
=
'
uniform_average
'
,
squared
=
False
)
print
(
"
rmse
"
,
rmse
)
return
rmse
def
get_mae
(
pred
,
actual
):
# Ignore nonzero terms.
pred
=
pred
[
actual
.
nonzero
()].
flatten
()
actual
=
actual
[
actual
.
nonzero
()].
flatten
()
mae
=
mean_absolute_error
(
actual
,
pred
,
sample_weight
=
None
,
multioutput
=
'
uniform_average
'
)
print
(
"
mae
"
,
mae
)
return
mae
def
get_item_item
(
u
,
mu
,
b_i
,
b_u
,
x_j
):
temp
=
np
.
zeros
(
k
)
for
j
in
indicating_mat
[
u
,:].
nonzero
()[
0
]:
temp
+=
(
ratings
[
u
,
j
]
-
mu
-
b_u
[
u
]
-
b_i
[
j
])
*
x_j
[
j
,
:]
return
temp
*
R
[
u
]
def
predict_one
(
u
,
i
,
b_i
,
b_u
,
q_i
,
x_j
,
y_j
):
item_item
=
get_item_item
(
u
,
global_bias
,
b_i
,
b_u
,
x_j
)
return
global_bias
+
b_i
[
i
]
+
b_u
[
u
]
+
\
q_i
[
i
,:].
T
.
dot
(
item_item
+
N
[
u
]
*
y_j
[
mask
[
u
,:],
:].
sum
(
axis
=
0
))
def
svdasy_step
():
rows
=
np
.
random
.
permutation
(
len
(
non_zeros
[
0
]))
size
=
len
(
non_zeros
[
0
])
c
=
0
for
i
in
rows
:
c
+=
1
if
((
c
%
0x1000
)
==
0
):
print
(
c
,
size
)
user
=
non_zeros
[
0
][
i
]
item
=
non_zeros
[
1
][
i
]
# print("user", user)
# print("item", item)
pred
=
predict_one
(
user
,
item
,
b_i
,
b_u
,
q_i
,
x_j
,
y_j
)
## Watch out to turn learning rate separately, this needs to be calculate separately
error
=
train
[
user
][
item
]
-
pred
b_u
[
user
]
+=
lrate
*
(
error
-
_lambda
*
b_u
[
user
])
b_i
[
item
]
+=
lrate
*
(
error
-
_lambda
*
b_i
[
item
])
item_item
=
get_item_item
(
user
,
global_bias
,
b_i
,
b_u
,
x_j
)
## Update for q_i (item vector)
q_i
[
item
,
:]
+=
lrate
*
(
error
*
(
item_item
+
N
[
user
]
*
y_j
[
mask
[
user
,
:],
:].
sum
(
axis
=
0
))
-
_lambda
*
q_i
[
item
,
:])
# Update for x_j (user vector)
j
=
indicating_mat
[
user
,:].
nonzero
()
x_j
[
j
,
:]
+=
lrate
*
(
error
*
item_item
*
q_i
[
item
,
:]
-
_lambda
*
x_j
[
j
,
:])
# Update for each y_j
temp
=
error
*
N
[
user
]
*
q_i
[
item
,
:]
j
=
indicating_mat
[
user
,:].
nonzero
()
y_j
[
j
,:]
+=
lrate
*
(
temp
-
_lambda
*
y_j
[
j
,:])
data_rmse
=
[]
data_mae
=
[]
def
predict
():
predictions
=
np
.
zeros
([
n_user
,
n_item
])
for
user
in
range
(
n_users
):
for
item
in
range
(
n_items
):
predictions
[
user
,
item
]
=
predict_one
(
user
,
item
,
b_i
,
b_u
,
q_i
,
x_j
,
y_j
)
data_rmse
.
append
([
get_rmse
(
predictions
,
train
),
get_rmse
(
predictions
,
test
)])
data_mae
.
append
([
get_mae
(
predictions
,
train
),
get_mae
(
predictions
,
test
)])
print
(
data_rmse
,
data_mae
)
# with open('./asym.csv', 'a') as f:
# f.write(get_mse(predictions, train)+','+get_mse(predictions, test)+' \n')
n_user
,
n_item
=
train
.
shape
# Hyperparameter:
k
=
50
steps
=
200
lrate
=
0.001
_lambda
=
0.001
# Parameters:
b_u
=
np
.
zeros
(
n_user
)
b_i
=
np
.
zeros
(
n_item
)
#q_i
q_i
=
np
.
random
.
normal
(
scale
=
1.
/
k
,
size
=
(
n_item
,
k
))
#P_u
x_j
=
np
.
random
.
normal
(
scale
=
1.
/
k
,
size
=
(
n_item
,
k
))
#y_i
y_j
=
np
.
random
.
normal
(
scale
=
1.
/
k
,
size
=
(
n_item
,
k
))
global_bias
=
np
.
mean
(
train
[
np
.
where
(
train
!=
0
)])
non_zeros
=
train
.
nonzero
()
# This is equivalent of taking the length and doing the square root.
# N = np.power(indicating_mat.sum(1), -0.5)
N
=
1.
/
np
.
linalg
.
norm
(
indicating_mat
,
axis
=
1
)
R
=
1.
/
np
.
linalg
.
norm
(
train
,
axis
=
1
)
for
i
in
range
(
steps
):
print
(
i
)
svdasy_step
()
if
i
%
10
==
0
:
predict
()
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment