Skip to content
Snippets Groups Projects
Commit f6ffcd39 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added equiv possibility for the HPS

parent ce7290d1
No related branches found
No related tags found
No related merge requests found
...@@ -22,13 +22,13 @@ Classification: ...@@ -22,13 +22,13 @@ Classification:
nb_folds: 2 nb_folds: 2
nb_class: 2 nb_class: 2
classes: classes:
type: ["monoview"] type: ["monoview", "multiview"]
algos_monoview: ["adaboost",] algos_monoview: ["adaboost",]
algos_multiview: ["weighted_linear_early_fusion"] algos_multiview: ["weighted_linear_early_fusion"]
stats_iter: 2 stats_iter: 2
metrics: ["accuracy_score", "f1_score"] metrics: ["accuracy_score", "f1_score"]
metric_princ: "f1_score" metric_princ: "f1_score"
hps_type: "randomized_search" hps_type: "randomized_search-equiv"
hps_iter: 2 hps_iter: 2
......
...@@ -264,5 +264,9 @@ The figure below represents the duration of the execution on a personal computer ...@@ -264,5 +264,9 @@ The figure below represents the duration of the execution on a personal computer
The duration is in seconds, and we used 2,5,10,15,20 as values for ``nb_folds`` and 2,5,10,20,30,50,100 for ``hps_iter`` with two monoview classifiers and one multiview classifier on simulated data. The duration is in seconds, and we used 2,5,10,15,20 as values for ``nb_folds`` and 2,5,10,20,30,50,100 for ``hps_iter`` with two monoview classifiers and one multiview classifier on simulated data.
.. note::
In order to compensate the fact that the multiview classifiers have more complex problems to solve, it is possible to use ``"randomized_search-equiv"`` as the HPS optimization method to allow
``hps_iter`` draws for the monoview classifiers and ``hps_iter * nb_view`` draws for the ones that are multiview.
...@@ -187,7 +187,7 @@ def getHPs(classifierModule, hyper_param_search, nIter, classifier_module_name, ...@@ -187,7 +187,7 @@ def getHPs(classifierModule, hyper_param_search, nIter, classifier_module_name,
logging.debug( logging.debug(
"Start:\t " + hyper_param_search + " best settings with " + str( "Start:\t " + hyper_param_search + " best settings with " + str(
nIter) + " iterations for " + classifier_module_name) nIter) + " iterations for " + classifier_module_name)
classifierHPSearch = getattr(hyper_parameter_search, hyper_param_search) classifierHPSearch = getattr(hyper_parameter_search, hyper_param_search.split("-")[0])
clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, "monoview", clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, "monoview",
random_state, random_state,
outputFileName, outputFileName,
......
...@@ -13,7 +13,7 @@ from .. import metrics ...@@ -13,7 +13,7 @@ from .. import metrics
def search_best_settings(dataset_var, labels, classifier_module, classifier_name, def search_best_settings(dataset_var, labels, classifier_module, classifier_name,
metrics, learning_indices, i_k_folds, random_state, metrics, learning_indices, i_k_folds, random_state,
directory, views_indices=None, nb_cores=1, directory, views_indices=None, nb_cores=1,
searching_tool="randomized_search", n_iter=1, searching_tool="randomized_search-equiv", n_iter=1,
classifier_config=None): classifier_config=None):
"""Used to select the right hyper-parameter optimization function """Used to select the right hyper-parameter optimization function
to optimize hyper parameters""" to optimize hyper parameters"""
...@@ -22,12 +22,12 @@ def search_best_settings(dataset_var, labels, classifier_module, classifier_name ...@@ -22,12 +22,12 @@ def search_best_settings(dataset_var, labels, classifier_module, classifier_name
output_file_name = directory output_file_name = directory
thismodule = sys.modules[__name__] thismodule = sys.modules[__name__]
if searching_tool is not "None": if searching_tool is not "None":
searching_tool_method = getattr(thismodule, searching_tool) searching_tool_method = getattr(thismodule, searching_tool.split("-")[0])
best_settings, test_folds_preds = searching_tool_method( best_settings, test_folds_preds = searching_tool_method(
dataset_var, labels, "multiview", random_state, output_file_name, dataset_var, labels, "multiview", random_state, output_file_name,
classifier_module, classifier_name, i_k_folds, classifier_module, classifier_name, i_k_folds,
nb_cores, metrics, n_iter, classifier_config, nb_cores, metrics, n_iter, classifier_config,
learning_indices=learning_indices, view_indices=views_indices,) learning_indices=learning_indices, view_indices=views_indices, equivalent_draws=searching_tool.endswith("equiv"))
else: else:
best_settings = classifier_config best_settings = classifier_config
return best_settings # or well set clasifier ? return best_settings # or well set clasifier ?
...@@ -108,7 +108,8 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) ...@@ -108,7 +108,8 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None)
def randomized_search(X, y, framework, random_state, output_file_name, classifier_module, def randomized_search(X, y, framework, random_state, output_file_name, classifier_module,
classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None],
n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None): n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None,
equivalent_draws=True):
estimator = getattr(classifier_module, classifier_name)(random_state=random_state, estimator = getattr(classifier_module, classifier_name)(random_state=random_state,
**classifier_kwargs) **classifier_kwargs)
params_dict = estimator.genDistribs() params_dict = estimator.genDistribs()
...@@ -132,7 +133,8 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie ...@@ -132,7 +133,8 @@ def randomized_search(X, y, framework, random_state, output_file_name, classifie
cv=folds, random_state=random_state, cv=folds, random_state=random_state,
learning_indices=learning_indices, learning_indices=learning_indices,
view_indices=view_indices, view_indices=view_indices,
framework = framework) framework = framework,
equivalent_draws=equivalent_draws)
random_search.fit(X, y) random_search.fit(X, y)
best_params = random_search.best_params_ best_params = random_search.best_params_
if "random_state" in best_params: if "random_state" in best_params:
...@@ -157,7 +159,8 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): ...@@ -157,7 +159,8 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV):
def __init__(self, estimator, param_distributions, n_iter=10, def __init__(self, estimator, param_distributions, n_iter=10,
refit=True, n_jobs=1, scoring=None, cv=None, refit=True, n_jobs=1, scoring=None, cv=None,
random_state=None, learning_indices=None, view_indices=None, framework="monoview"): random_state=None, learning_indices=None, view_indices=None, framework="monoview",
equivalent_draws=True):
super(MultiviewCompatibleRandomizedSearchCV, self).__init__(estimator, super(MultiviewCompatibleRandomizedSearchCV, self).__init__(estimator,
n_iter=n_iter, n_iter=n_iter,
param_distributions=param_distributions, param_distributions=param_distributions,
...@@ -167,6 +170,7 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): ...@@ -167,6 +170,7 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV):
self.framework = framework self.framework = framework
self.available_indices = learning_indices self.available_indices = learning_indices
self.view_indices = view_indices self.view_indices = view_indices
self.equivalent_draws = equivalent_draws
def fit(self, X, y=None, groups=None, **fit_params): def fit(self, X, y=None, groups=None, **fit_params):
if self.framework == "monoview": if self.framework == "monoview":
...@@ -180,6 +184,8 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV): ...@@ -180,6 +184,8 @@ class MultiviewCompatibleRandomizedSearchCV(RandomizedSearchCV):
candidate_params = list(self._get_param_iterator()) candidate_params = list(self._get_param_iterator())
base_estimator = clone(self.estimator) base_estimator = clone(self.estimator)
results = {} results = {}
if self.equivalent_draws:
self.n_iter = self.n_iter*X.nb_view
self.cv_results_ = dict(("param_"+param_name, []) for param_name in candidate_params[0].keys()) self.cv_results_ = dict(("param_"+param_name, []) for param_name in candidate_params[0].keys())
self.cv_results_["mean_test_score"] = [] self.cv_results_["mean_test_score"] = []
for candidate_param_idx, candidate_param in enumerate(candidate_params): for candidate_param_idx, candidate_param in enumerate(candidate_params):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment