Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bolsonaro
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Luc Giffon
bolsonaro
Merge requests
!19
WIP: Resolve "Adding new datasets"
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
WIP: Resolve "Adding new datasets"
17-adding-new-datasets
into
master
Overview
0
Commits
20
Pipelines
0
Changes
62
Merged
Charly Lamothe
requested to merge
17-adding-new-datasets
into
master
5 years ago
Overview
0
Commits
20
Pipelines
0
Changes
62
Expand
0
0
Merge request reports
Compare
master
version 6
802fc322
5 years ago
version 5
5f15d2a5
5 years ago
version 4
a955fda6
5 years ago
version 3
3507558e
5 years ago
version 2
8d911afd
5 years ago
version 1
69f71671
5 years ago
master (base)
and
latest version
latest version
dd5e9cde
20 commits,
5 years ago
version 6
802fc322
19 commits,
5 years ago
version 5
5f15d2a5
18 commits,
5 years ago
version 4
a955fda6
5 commits,
5 years ago
version 3
3507558e
4 commits,
5 years ago
version 2
8d911afd
3 commits,
5 years ago
version 1
69f71671
2 commits,
5 years ago
62 files
+
393
−
205
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
62
Search (e.g. *.vue) (Ctrl+P)
code/bolsonaro/models/ensemble_selection_forest_regressor.py
0 → 100644
+
90
−
0
Options
from
sklearn.metrics
import
mean_squared_error
from
sklearn.base
import
BaseEstimator
from
sklearn.tree
import
DecisionTreeRegressor
from
abc
import
abstractmethod
,
ABCMeta
import
numpy
as
np
from
tqdm
import
tqdm
class
EnsembleSelectionForestRegressor
(
BaseEstimator
,
metaclass
=
ABCMeta
):
"""
'
Ensemble selection from libraries of models
'
by Rich Caruana et al
"""
def
__init__
(
self
,
models_parameters
,
library
,
score_metric
=
mean_squared_error
):
self
.
_models_parameters
=
models_parameters
self
.
_library
=
library
self
.
_extracted_forest_size
=
self
.
_models_parameters
.
extracted_forest_size
self
.
_score_metric
=
score_metric
@property
def
models_parameters
(
self
):
return
self
.
_models_parameters
@property
def
library
(
self
):
return
self
.
_library
def
fit
(
self
,
X_train
,
y_train
,
X_val
,
y_val
):
scores_list
=
list
()
for
estimator
in
self
.
_library
:
val_score
=
self
.
_score_metric
(
estimator
.
predict
(
X_val
),
y_val
)
scores_list
.
append
(
val_score
)
class_list
=
list
(
self
.
_library
)
m
=
np
.
argmax
(
np
.
asarray
(
scores_list
))
self
.
_ensemble_selected
=
[
class_list
[
m
]]
temp_pred
=
class_list
[
m
].
predict
(
X_val
)
del
class_list
[
m
]
for
k
in
range
(
self
.
_extracted_forest_size
-
1
):
candidate_index
=
0
best_score
=
100000
for
j
in
range
(
len
(
class_list
)):
temp_pred
=
np
.
vstack
((
temp_pred
,
class_list
[
j
].
predict
(
X_val
)))
temp_mean
=
np
.
mean
(
temp_pred
,
axis
=
0
)
temp_score
=
self
.
_score_metric
(
temp_mean
,
y_val
)
if
(
temp_score
<
best_score
):
candidate_index
=
j
best_score
=
temp_score
temp_pred
=
np
.
delete
(
temp_pred
,
-
1
,
0
)
self
.
_ensemble_selected
.
append
(
class_list
[
candidate_index
])
temp_pred
=
np
.
vstack
((
temp_pred
,
class_list
[
candidate_index
].
predict
(
X_val
)))
del
class_list
[
candidate_index
]
def
score
(
self
,
X
,
y
):
predictions
=
self
.
_predict_base_estimator
(
X
)
return
self
.
_score_metric
(
predictions
,
y
)
def
predict_base_estimator
(
self
,
X
):
predictions
=
list
()
for
tree
in
self
.
_ensemble_selected
:
predictions
.
append
(
tree
.
predict
(
X
))
mean_predictions
=
np
.
mean
(
np
.
array
(
predictions
),
axis
=
0
)
return
mean_predictions
@staticmethod
def
generate_library
(
X_train
,
y_train
,
random_state
=
None
):
criterion_arr
=
[
"
mse
"
]
#, "friedman_mse", "mae"]
splitter_arr
=
[
"
best
"
]
#, "random"]
depth_arr
=
[
i
for
i
in
range
(
5
,
20
,
1
)]
min_samples_split_arr
=
[
i
for
i
in
range
(
2
,
20
,
1
)]
min_samples_leaf_arr
=
[
i
for
i
in
range
(
2
,
20
,
1
)]
max_features_arr
=
[
"
sqrt
"
]
#["auto", "sqrt", "log2"]
library
=
list
()
with
tqdm
(
total
=
len
(
criterion_arr
)
*
len
(
splitter_arr
)
*
\
len
(
depth_arr
)
*
len
(
min_samples_split_arr
)
*
len
(
min_samples_leaf_arr
)
*
\
len
(
max_features_arr
))
as
bar
:
bar
.
set_description
(
'
Generating library
'
)
for
criterion
in
criterion_arr
:
for
splitter
in
splitter_arr
:
for
depth
in
depth_arr
:
for
min_samples_split
in
min_samples_split_arr
:
for
min_samples_leaf
in
min_samples_leaf_arr
:
for
max_features
in
max_features_arr
:
t
=
DecisionTreeRegressor
(
criterion
=
criterion
,
splitter
=
splitter
,
max_depth
=
depth
,
min_samples_split
=
min_samples_split
,
min_samples_leaf
=
min_samples_leaf
,
max_features
=
max_features
,
random_state
=
random_state
)
t
.
fit
(
X_train
,
y_train
)
library
.
append
(
t
)
bar
.
update
(
1
)
return
library
Loading