Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bolsonaro
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Luc Giffon
bolsonaro
Merge requests
!20
Resolve "integration-sota"
Code
Review changes
Check out branch
Download
Patches
Plain diff
Expand sidebar
Merged
Resolve "integration-sota"
15-integration-sota
into
master
Overview
0
Commits
25
Pipelines
0
Changes
11
Merged
Resolve "integration-sota"
Charly Lamothe
requested to merge
15-integration-sota
into
master
Mar 6, 2020
Overview
0
Commits
25
Pipelines
0
Changes
11
0
0
Merge request reports
Compare
master
version 15
baf8cb3c
Mar 10, 2020
version 14
7562c0c1
Mar 10, 2020
version 13
96ff6093
Mar 10, 2020
version 12
34070d2c
Mar 6, 2020
version 11
6483c0dc
Mar 6, 2020
version 10
138660cb
Mar 6, 2020
version 9
731cee0a
Mar 6, 2020
version 8
86c4cf10
Mar 6, 2020
version 7
bf240b77
Mar 6, 2020
version 6
1194ee2f
Mar 6, 2020
version 5
0363926f
Mar 6, 2020
version 4
46a4a8b0
Mar 6, 2020
version 3
ca5d0080
Mar 6, 2020
version 2
94668904
Mar 6, 2020
version 1
125817c1
Mar 6, 2020
master (base)
and
version 10
latest version
462e76fa
25 commits,
Mar 10, 2020
version 15
baf8cb3c
16 commits,
Mar 10, 2020
version 14
7562c0c1
15 commits,
Mar 10, 2020
version 13
96ff6093
14 commits,
Mar 10, 2020
version 12
34070d2c
13 commits,
Mar 6, 2020
version 11
6483c0dc
12 commits,
Mar 6, 2020
version 10
138660cb
11 commits,
Mar 6, 2020
version 9
731cee0a
10 commits,
Mar 6, 2020
version 8
86c4cf10
8 commits,
Mar 6, 2020
version 7
bf240b77
7 commits,
Mar 6, 2020
version 6
1194ee2f
6 commits,
Mar 6, 2020
version 5
0363926f
5 commits,
Mar 6, 2020
version 4
46a4a8b0
4 commits,
Mar 6, 2020
version 3
ca5d0080
3 commits,
Mar 6, 2020
version 2
94668904
2 commits,
Mar 6, 2020
version 1
125817c1
1 commit,
Mar 6, 2020
11 files
+
275
−
79
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
11
code/bolsonaro/models/ensemble_selection_forest_regressor.py
0 → 100644
+
90
−
0
View file @ 138660cb
Edit in single-file editor
Open in Web IDE
from
sklearn.metrics
import
mean_squared_error
from
sklearn.base
import
BaseEstimator
from
sklearn.tree
import
DecisionTreeRegressor
from
abc
import
abstractmethod
,
ABCMeta
import
numpy
as
np
from
tqdm
import
tqdm
class
EnsembleSelectionForestRegressor
(
BaseEstimator
,
metaclass
=
ABCMeta
):
"""
'
Ensemble selection from libraries of models
'
by Rich Caruana et al
"""
def
__init__
(
self
,
models_parameters
,
library
,
score_metric
=
mean_squared_error
):
self
.
_models_parameters
=
models_parameters
self
.
_library
=
library
self
.
_extracted_forest_size
=
self
.
_models_parameters
.
extracted_forest_size
self
.
_score_metric
=
score_metric
@property
def
models_parameters
(
self
):
return
self
.
_models_parameters
@property
def
library
(
self
):
return
self
.
_library
def
fit
(
self
,
X_train
,
y_train
,
X_val
,
y_val
):
scores_list
=
list
()
for
estimator
in
self
.
_library
:
val_score
=
self
.
_score_metric
(
estimator
.
predict
(
X_val
),
y_val
)
scores_list
.
append
(
val_score
)
class_list
=
list
(
self
.
_library
)
m
=
np
.
argmax
(
np
.
asarray
(
scores_list
))
self
.
_ensemble_selected
=
[
class_list
[
m
]]
temp_pred
=
class_list
[
m
].
predict
(
X_val
)
del
class_list
[
m
]
for
k
in
range
(
self
.
_extracted_forest_size
-
1
):
candidate_index
=
0
best_score
=
100000
for
j
in
range
(
len
(
class_list
)):
temp_pred
=
np
.
vstack
((
temp_pred
,
class_list
[
j
].
predict
(
X_val
)))
temp_mean
=
np
.
mean
(
temp_pred
,
axis
=
0
)
temp_score
=
self
.
_score_metric
(
temp_mean
,
y_val
)
if
(
temp_score
<
best_score
):
candidate_index
=
j
best_score
=
temp_score
temp_pred
=
np
.
delete
(
temp_pred
,
-
1
,
0
)
self
.
_ensemble_selected
.
append
(
class_list
[
candidate_index
])
temp_pred
=
np
.
vstack
((
temp_pred
,
class_list
[
candidate_index
].
predict
(
X_val
)))
del
class_list
[
candidate_index
]
def
score
(
self
,
X
,
y
):
predictions
=
self
.
_predict_base_estimator
(
X
)
return
self
.
_score_metric
(
predictions
,
y
)
def
predict_base_estimator
(
self
,
X
):
predictions
=
list
()
for
tree
in
self
.
_ensemble_selected
:
predictions
.
append
(
tree
.
predict
(
X
))
mean_predictions
=
np
.
mean
(
np
.
array
(
predictions
),
axis
=
0
)
return
mean_predictions
@staticmethod
def
generate_library
(
X_train
,
y_train
,
random_state
=
None
):
criterion_arr
=
[
"
mse
"
]
#, "friedman_mse", "mae"]
splitter_arr
=
[
"
best
"
]
#, "random"]
depth_arr
=
[
i
for
i
in
range
(
5
,
20
,
1
)]
min_samples_split_arr
=
[
i
for
i
in
range
(
2
,
20
,
1
)]
min_samples_leaf_arr
=
[
i
for
i
in
range
(
2
,
20
,
1
)]
max_features_arr
=
[
"
sqrt
"
]
#["auto", "sqrt", "log2"]
library
=
list
()
with
tqdm
(
total
=
len
(
criterion_arr
)
*
len
(
splitter_arr
)
*
\
len
(
depth_arr
)
*
len
(
min_samples_split_arr
)
*
len
(
min_samples_leaf_arr
)
*
\
len
(
max_features_arr
))
as
bar
:
bar
.
set_description
(
'
Generating library
'
)
for
criterion
in
criterion_arr
:
for
splitter
in
splitter_arr
:
for
depth
in
depth_arr
:
for
min_samples_split
in
min_samples_split_arr
:
for
min_samples_leaf
in
min_samples_leaf_arr
:
for
max_features
in
max_features_arr
:
t
=
DecisionTreeRegressor
(
criterion
=
criterion
,
splitter
=
splitter
,
max_depth
=
depth
,
min_samples_split
=
min_samples_split
,
min_samples_leaf
=
min_samples_leaf
,
max_features
=
max_features
,
random_state
=
random_state
)
t
.
fit
(
X_train
,
y_train
)
library
.
append
(
t
)
bar
.
update
(
1
)
return
library
Loading