Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bolsonaro
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Luc Giffon
bolsonaro
Commits
d7865314
Commit
d7865314
authored
5 years ago
by
Luc Giffon
Browse files
Options
Downloads
Patches
Plain Diff
now the model can make predictions: todo: manage result recording
parent
b62b7df7
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!3
clean scripts
,
!2
Luc manage normalization
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
code/bolsonaro/models/omp_forest_regressor.py
+43
-7
43 additions, 7 deletions
code/bolsonaro/models/omp_forest_regressor.py
code/bolsonaro/trainer.py
+1
-0
1 addition, 0 deletions
code/bolsonaro/trainer.py
code/train.py
+4
-1
4 additions, 1 deletion
code/train.py
with
48 additions
and
8 deletions
code/bolsonaro/models/omp_forest_regressor.py
+
43
−
7
View file @
d7865314
...
@@ -34,6 +34,9 @@ class OmpForestRegressor(BaseEstimator):
...
@@ -34,6 +34,9 @@ class OmpForestRegressor(BaseEstimator):
def
models_parameters
(
self
):
def
models_parameters
(
self
):
return
self
.
_models_parameters
return
self
.
_models_parameters
def
score_regressor
(
self
,
X
,
y
):
return
self
.
_regressor
.
score
(
X
,
y
)
def
_train_forest
(
self
,
X_train
,
y_train
):
def
_train_forest
(
self
,
X_train
,
y_train
):
self
.
_regressor
.
fit
(
X_train
,
y_train
)
self
.
_regressor
.
fit
(
X_train
,
y_train
)
forest
=
self
.
_regressor
.
estimators_
forest
=
self
.
_regressor
.
estimators_
...
@@ -51,24 +54,57 @@ class OmpForestRegressor(BaseEstimator):
...
@@ -51,24 +54,57 @@ class OmpForestRegressor(BaseEstimator):
:return:
:return:
"""
"""
self
.
_logger
.
debug
(
"
Forest make prediction on X_train
"
)
self
.
_logger
.
debug
(
"
Forest make prediction on X_train
"
)
D
=
np
.
array
([
tree
.
predict
(
X_train
)
for
tree
in
self
.
_forest
]).
T
D
=
self
.
_forest_
predict
ion
(
X_train
)
if
self
.
_models_parameters
.
normalize
:
if
self
.
_models_parameters
.
normalize
:
self
.
_logger
.
debug
(
"
Compute norm of predicted vectors on X_train
"
)
self
.
_logger
.
debug
(
"
Compute norm of predicted vectors on X_train
"
)
self
.
_forest_norms
=
np
.
linalg
.
norm
(
D
,
axis
=
0
)
self
.
_forest_norms
=
np
.
linalg
.
norm
(
D
,
axis
=
0
)
D
/=
self
.
_forest_norms
D
/=
self
.
_forest_norms
omp
=
OrthogonalMatchingPursuit
(
omp
=
OrthogonalMatchingPursuit
(
n_nonzero_coefs
=
self
.
_models_parameters
.
extracted_forest_size
,
n_nonzero_coefs
=
self
.
_models_parameters
.
extracted_forest_size
,
fit_intercept
=
False
,
normalize
=
False
)
fit_intercept
=
False
,
normalize
=
False
)
self
.
_logger
.
debug
(
"
Apply orthogonal maching pursuit on forest for {} extracted trees.
"
self
.
_logger
.
debug
(
"
Apply orthogonal maching pursuit on forest for {} extracted trees.
"
.
format
(
self
.
_models_parameters
.
extracted_forest_size
))
.
format
(
self
.
_models_parameters
.
extracted_forest_size
))
omp
.
fit
(
D
,
y_train
)
omp
.
fit
(
D
,
y_train
)
weights
=
omp
.
coef_
# why not to use directly the omp estimator and bypass it using the coefs?
weights
=
omp
.
coef_
# question: why not to use directly the omp estimator instead of bypassing it using the coefs?
return
weights
return
weights
def
predict
(
self
):
def
_forest_prediction
(
self
,
X
):
raise
NotImplementedError
(
"
TODO: implement predict function
"
)
return
np
.
array
([
tree
.
predict
(
X
)
for
tree
in
self
.
_forest
]).
T
# todo don't forget to deal with the normalize parameter
# should the norm used on train or the new norms be used for normalization?
def
predict
(
self
,
X
):
"""
Apply the OMPForestRegressor to X.
:param X:
:return:
"""
D
=
self
.
_forest_prediction
(
X
)
if
self
.
_models_parameters
.
normalize
:
D
/=
self
.
_forest_norms
predictions
=
D
@
self
.
weights
return
predictions
def
score
(
self
,
X
,
y
,
metric
=
"
mse
"
):
"""
Evaluate OMPForestRegressor on (`X`, `y`) using `metric`
:param X:
:param y:
:param metric:
:return:
"""
predictions
=
self
.
predict
(
X
)
if
metric
==
"
mse
"
:
evaluation
=
np
.
mean
(
np
.
square
(
predictions
-
y
))
else
:
raise
ValueError
(
"
Metric value {} is not known.
"
)
return
evaluation
\ No newline at end of file
This diff is collapsed.
Click to expand it.
code/bolsonaro/trainer.py
+
1
−
0
View file @
d7865314
...
@@ -17,6 +17,7 @@ class Trainer(object):
...
@@ -17,6 +17,7 @@ class Trainer(object):
# why is this function named iterate?
# why is this function named iterate?
self
.
_logger
.
info
(
'
Training model using train set...
'
)
self
.
_logger
.
info
(
'
Training model using train set...
'
)
begin_time
=
time
.
time
()
begin_time
=
time
.
time
()
# todo: OMP may be running with X_dev ou Y_dev
model
.
fit
(
self
.
_dataset
.
X_train
,
self
.
_dataset
.
y_train
)
model
.
fit
(
self
.
_dataset
.
X_train
,
self
.
_dataset
.
y_train
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
...
...
This diff is collapsed.
Click to expand it.
code/train.py
+
4
−
1
View file @
d7865314
...
@@ -20,7 +20,7 @@ if __name__ == "__main__":
...
@@ -20,7 +20,7 @@ if __name__ == "__main__":
load_dotenv
(
find_dotenv
())
load_dotenv
(
find_dotenv
())
default_dataset_name
=
'
boston
'
default_dataset_name
=
'
boston
'
default_normalize
=
Fals
e
default_normalize
=
Tru
e
default_forest_size
=
100
default_forest_size
=
100
default_extracted_forest_size
=
10
default_extracted_forest_size
=
10
# the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_size/{nb_extracted_trees}
# the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_size/{nb_extracted_trees}
...
@@ -100,3 +100,6 @@ if __name__ == "__main__":
...
@@ -100,3 +100,6 @@ if __name__ == "__main__":
model
=
ModelFactory
.
build
(
dataset
.
task
,
model_parameters
)
model
=
ModelFactory
.
build
(
dataset
.
task
,
model_parameters
)
trainer
.
iterate
(
model
,
sub_models_dir
)
trainer
.
iterate
(
model
,
sub_models_dir
)
print
(
model
.
score
(
dataset
.
X_test
,
dataset
.
y_test
))
print
(
model
.
score_regressor
(
dataset
.
X_test
,
dataset
.
y_test
))
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment