Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bolsonaro
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Luc Giffon
bolsonaro
Merge requests
!12
Resolve "integration-sota"
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Resolve "integration-sota"
15-integration-sota
into
master
Overview
0
Commits
8
Pipelines
0
Changes
9
Merged
Charly Lamothe
requested to merge
15-integration-sota
into
master
5 years ago
Overview
0
Commits
8
Pipelines
0
Changes
17
Expand
Closes
#15 (closed)
Edited
5 years ago
by
Charly Lamothe
0
0
Merge request reports
Compare
version 2
version 7
c86fc38d
5 years ago
version 6
29a11860
5 years ago
version 5
0a97ff64
5 years ago
version 4
be5bc24a
5 years ago
version 3
31724b30
5 years ago
version 2
9d68b04f
5 years ago
version 1
59e65276
5 years ago
master (base)
and
latest version
latest version
41ec448d
8 commits,
5 years ago
version 7
c86fc38d
7 commits,
5 years ago
version 6
29a11860
6 commits,
5 years ago
version 5
0a97ff64
5 commits,
5 years ago
version 4
be5bc24a
4 commits,
5 years ago
version 3
31724b30
3 commits,
5 years ago
version 2
9d68b04f
2 commits,
5 years ago
version 1
59e65276
1 commit,
5 years ago
Show latest version
17 files
+
696
−
277
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
17
Search (e.g. *.vue) (Ctrl+P)
code/bolsonaro/models/kmeans_forest_regressor.py
+
18
−
30
Options
@@ -16,75 +16,63 @@ class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
On extreme pruning of random forest ensembles for ral-time predictive applications
'
, by Khaled Fawagreh, Mohamed Medhat Gaber and Eyad Elyan.
"""
def
__init__
(
self
,
models_parameters
):
def
__init__
(
self
,
models_parameters
,
score_metric
=
mean_squared_error
):
self
.
_models_parameters
=
models_parameters
self
.
_
regress
or
=
RandomForestRegressor
(
n_estimators
=
self
.
_models_parameters
.
hyperparameters
[
'
n_estimators
'
]
,
random_state
=
models_parameters
.
seed
,
n_jobs
=-
1
)
self
.
_
estimat
or
=
RandomForestRegressor
(
**
self
.
_models_parameters
.
hyperparameters
,
random_state
=
self
.
_
models_parameters
.
seed
,
n_jobs
=-
1
)
self
.
_extracted_forest_size
=
self
.
_models_parameters
.
extracted_forest_size
self
.
_score_metric
=
score_metric
@property
def
models_parameters
(
self
):
return
self
.
_models_parameters
def
fit
(
self
,
X_train
,
y_train
,
X_val
,
y_val
,
score_metric
=
mean_squared_error
):
self
.
_
regress
or
.
fit
(
X_train
,
y_train
)
def
fit
(
self
,
X_train
,
y_train
,
X_val
,
y_val
):
self
.
_
estimat
or
.
fit
(
X_train
,
y_train
)
predictions
=
list
()
for
tree
in
self
.
_
regress
or
.
estimators_
:
for
tree
in
self
.
_
estimat
or
.
estimators_
:
predictions
.
append
(
tree
.
predict
(
X_train
))
predictions
=
np
.
array
(
predictions
)
kmeans
=
KMeans
(
n_clusters
=
self
.
_extracted_forest_size
,
random_state
=
self
.
_models_parameters
.
seed
).
fit
(
predictions
)
labels
=
np
.
array
(
kmeans
.
labels_
)
# for each cluster select the best tree on the validation set
"""
pruned_forest = list()
for c in range(self._extracted_forest_size):
index = np.where(labels == c)[0]
cluster = list()
for i in index:
y_val_pred = self._regressor.estimators_[i].predict(X_val)
tree_pred = score_metric(y_val, y_val_pred)
cluster.append(tree_pred)
best_tree_index = np.argmax(cluster)
pruned_forest.append(self._regressor.estimators_[index[best_tree_index]])
"""
# For each cluster select the best tree on the validation set
extracted_forest_sizes
=
list
(
range
(
self
.
_extracted_forest_size
))
with
tqdm_joblib
(
tqdm
(
total
=
self
.
_extracted_forest_size
,
disable
=
Fals
e
))
as
prune_forest_job_pb
:
with
tqdm_joblib
(
tqdm
(
total
=
self
.
_extracted_forest_size
,
disable
=
Tru
e
))
as
prune_forest_job_pb
:
pruned_forest
=
Parallel
(
n_jobs
=-
1
)(
delayed
(
self
.
_prune_forest_job
)(
prune_forest_job_pb
,
extracted_forest_sizes
[
i
],
labels
,
X_val
,
y_val
,
score_metric
)
extracted_forest_sizes
[
i
],
labels
,
X_val
,
y_val
,
self
.
_
score_metric
)
for
i
in
range
(
self
.
_extracted_forest_size
))
self
.
_
regress
or
.
estimators_
=
pruned_forest
self
.
_
estimat
or
.
estimators_
=
pruned_forest
def
_prune_forest_job
(
self
,
prune_forest_job_pb
,
c
,
labels
,
X_val
,
y_val
,
score_metric
):
index
=
np
.
where
(
labels
==
c
)[
0
]
with
tqdm_joblib
(
tqdm
(
total
=
len
(
index
),
disable
=
Fals
e
))
as
cluster_job_pb
:
with
tqdm_joblib
(
tqdm
(
total
=
len
(
index
),
disable
=
Tru
e
))
as
cluster_job_pb
:
cluster
=
Parallel
(
n_jobs
=-
1
)(
delayed
(
self
.
_cluster_job
)(
cluster_job_pb
,
index
[
i
],
X_val
,
y_val
,
score_metric
)
for
i
in
range
(
len
(
index
)))
best_tree_index
=
np
.
argmax
(
cluster
)
prune_forest_job_pb
.
update
()
return
self
.
_
regress
or
.
estimators_
[
index
[
best_tree_index
]]
return
self
.
_
estimat
or
.
estimators_
[
index
[
best_tree_index
]]
def
_cluster_job
(
self
,
cluster_job_pb
,
i
,
X_val
,
y_val
,
score_metric
):
y_val_pred
=
self
.
_
regress
or
.
estimators_
[
i
].
predict
(
X_val
)
y_val_pred
=
self
.
_
estimat
or
.
estimators_
[
i
].
predict
(
X_val
)
tree_pred
=
score_metric
(
y_val
,
y_val_pred
)
cluster_job_pb
.
update
()
return
tree_pred
def
predict
(
self
,
X
):
return
self
.
_
regress
or
.
predict
(
X
)
return
self
.
_
estimat
or
.
predict
(
X
)
def
score
(
self
,
X
,
y
):
predictions
=
list
()
for
tree
in
self
.
_
regress
or
.
estimators_
:
for
tree
in
self
.
_
estimat
or
.
estimators_
:
predictions
.
append
(
tree
.
predict
(
X
))
predictions
=
np
.
array
(
predictions
)
mean_predictions
=
np
.
mean
(
predictions
,
axis
=
0
)
score
=
mean_squared_error
(
mean_predictions
,
y
)
score
=
self
.
_score_metric
(
mean_predictions
,
y
)
return
score
def
predict_base_estimator
(
self
,
X
):
return
self
.
_
regress
or
.
predict
(
X
)
return
self
.
_
estimat
or
.
predict
(
X
)
Loading