Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bolsonaro
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Luc Giffon
bolsonaro
Merge requests
!23
Resolve "integration-sota"
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Resolve "integration-sota"
15-integration-sota
into
master
Overview
0
Commits
23
Pipelines
0
Changes
5
Merged
Charly Lamothe
requested to merge
15-integration-sota
into
master
5 years ago
Overview
0
Commits
23
Pipelines
0
Changes
5
Expand
Closes
#15 (closed)
0
0
Merge request reports
Viewing commit
bab07f41
Prev
Next
Show latest version
5 files
+
148
−
74
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
5
Search (e.g. *.vue) (Ctrl+P)
bab07f41
similarityforest now handle classification
· bab07f41
Luc Giffon
authored
5 years ago
code/bolsonaro/models/kmeans_forest_regressor.py
+
48
−
34
Options
import
time
from
bolsonaro.models.utils
import
score_metric_mse
,
score_metric_indicator
,
aggregation_classification
,
aggregation_regression
from
bolsonaro.utils
import
tqdm_joblib
from
sklearn.ensemble
import
RandomForestRegressor
@@ -53,72 +54,85 @@ class KmeansForest(BaseEstimator, metaclass=ABCMeta):
lst_pruned_forest
.
append
(
self
.
_estimator
.
estimators_
[
index_trees_cluster
[
best_tree_index
]])
self
.
_selected_trees
=
lst_pruned_forest
self
.
_estimator
.
estimators_
=
lst_pruned_forest
#
self._estimator.estimators_ = lst_pruned_forest
def
score
(
self
,
X
,
y
):
predictions
=
np
.
empty
((
len
(
self
.
_estimator
.
estimators_
),
X
.
shape
[
0
]))
for
idx_tree
,
tree
in
enumerate
(
self
.
_estimator
.
estimators_
):
predictions
[
idx_tree
,
:]
=
tree
.
predict
(
X
)
final_predictions
=
self
.
_aggregate
(
predictions
)
final_predictions
=
self
.
predict
(
X
)
score
=
self
.
_score_metric
(
final_predictions
,
y
)[
0
]
return
score
def
predict
(
self
,
X
):
return
self
.
_estimator
.
predict
(
X
)
predictions
=
np
.
empty
((
len
(
self
.
_selected_trees
),
X
.
shape
[
0
]))
for
idx_tree
,
tree
in
enumerate
(
self
.
_selected_trees
):
predictions
[
idx_tree
,
:]
=
tree
.
predict
(
X
)
final_predictions
=
self
.
_aggregate
(
predictions
)
return
final_predictions
def
predict_base_estimator
(
self
,
X
):
return
self
.
_estimator
.
predict
(
X
)
def
_get_best_tree_index
(
self
,
y_preds
,
y_true
):
score
=
self
.
_score_metric
(
y_preds
,
y_true
)
best_tree_index
=
self
.
_best
(
score
)
# get best scoring tree (the one with lowest mse)
return
best_tree_index
@abstractmethod
def
_score_metric
(
self
,
y_preds
,
y_true
):
"""
get score of each predictors in y_preds
y_preds.shape == (nb_trees, nb_sample)
y_true.shape == (1, nb_sample)
:param y_preds:
:param y_true:
:return:
"""
pass
@staticmethod
@abstractmethod
def
_get_best_tree_index
(
self
,
y_preds
,
y_true
):
def
_best
(
array
):
"""
return index of best element in array
:param array:
:return:
"""
pass
@abstractmethod
def
_aggregate
(
self
,
predictions
):
"""
Aggregates votes of predictors in predictions
predictions shape: (nb_trees, nb_samples)
:param predictions:
:return:
"""
pass
class
KMeansForestRegressor
(
KmeansForest
,
metaclass
=
ABCMeta
):
def
_aggregate
(
self
,
predictions
):
return
np
.
mea
n
(
predictions
,
axis
=
0
)
return
aggregation_regressio
n
(
predictions
)
def
_score_metric
(
self
,
y_preds
,
y_true
):
if
len
(
y_true
.
shape
)
==
1
:
y_true
=
y_true
[
np
.
newaxis
,
:]
if
len
(
y_preds
.
shape
)
==
1
:
y_preds
=
y_preds
[
np
.
newaxis
,
:]
assert
y_preds
.
shape
[
1
]
==
y_true
.
shape
[
1
],
"
Number of examples to compare should be the same in y_preds and y_true
"
return
score_metric_mse
(
y_preds
,
y_true
)
diff
=
y_preds
-
y_true
squared_diff
=
diff
**
2
mean_squared_diff
=
np
.
mean
(
squared_diff
,
axis
=
1
)
return
mean_squared_diff
@staticmethod
def
_best
(
array
):
return
np
.
argmin
(
array
)
def
_get_best_tree_index
(
self
,
y_preds
,
y_true
):
score
=
self
.
_score_metric
(
y_preds
,
y_true
)
best_tree_index
=
np
.
argmin
(
score
)
# get best scoring tree (the one with lowest mse)
return
best_tree_index
class
KMeansForestClassifier
(
KmeansForest
,
metaclass
=
ABCMeta
):
def
_aggregate
(
self
,
predictions
):
return
np
.
sign
(
np
.
sum
(
predictions
,
axis
=
0
)
)
return
aggregation_classification
(
predictions
)
def
_score_metric
(
self
,
y_preds
,
y_true
):
if
len
(
y_true
.
shape
)
==
1
:
y_true
=
y_true
[
np
.
newaxis
,
:]
if
len
(
y_preds
.
shape
)
==
1
:
y_preds
=
y_preds
[
np
.
newaxis
,
:]
assert
y_preds
.
shape
[
1
]
==
y_true
.
shape
[
1
],
"
Number of examples to compare should be the same in y_preds and y_true
"
return
score_metric_indicator
(
y_preds
,
y_true
)
bool_arr_correct_predictions
=
y_preds
==
y_true
return
np
.
average
(
bool_arr_correct_predictions
,
axis
=
1
)
def
_get_best_tree_index
(
self
,
y_preds
,
y_true
):
score
=
self
.
_score_metric
(
y_preds
,
y_true
)
best_tree_index
=
np
.
argmax
(
score
)
# get best scoring tree (the one with lowest mse)
return
best_tree_index
\ No newline at end of file
@staticmethod
def
_best
(
array
):
return
np
.
argmax
(
array
)
Loading