diff --git a/config_files/config.yml b/config_files/config.yml index 33c5469e8a209f553a1cd3eb726b6a3d3add7499..22a51e7214341a2df96002f19cf33d1456b68876 100644 --- a/config_files/config.yml +++ b/config_files/config.yml @@ -22,8 +22,8 @@ Classification: nb_folds: 2 nb_class: 2 classes: - type: ["multiview"] - algos_monoview: ["decision_tree", "adaboost"] + type: ["monoview","multiview"] + algos_monoview: ["all"] algos_multiview: ["all"] stats_iter: 2 metrics: ["accuracy_score", "f1_score"] @@ -145,7 +145,7 @@ weighted_linear_early_fusion: splitter: ["best"] entropy_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree"]] classifier_configs: decision_tree: max_depth: [1] @@ -153,7 +153,7 @@ entropy_fusion: splitter: ["best"] disagree_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree"]] classifier_configs: decision_tree: max_depth: [1] @@ -162,7 +162,7 @@ disagree_fusion: double_fault_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree"]] classifier_configs: decision_tree: max_depth: [1] @@ -170,7 +170,7 @@ double_fault_fusion: splitter: ["best"] difficulty_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree"]] classifier_configs: decision_tree: max_depth: [1] @@ -178,7 +178,7 @@ difficulty_fusion: splitter: ["best"] scm_late_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree"]] p: 0.1 max_rules: 10 model_type: 'conjunction' @@ -189,7 +189,7 @@ scm_late_fusion: splitter: ["best"] majority_voting_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] classifier_configs: decision_tree: max_depth: [1] @@ -197,7 +197,7 @@ majority_voting_fusion: splitter: ["best"] bayesian_inference_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] classifier_configs: decision_tree: max_depth: [1] @@ -205,7 +205,7 @@ bayesian_inference_fusion: splitter: ["best"] weighted_linear_late_fusion: - classifier_names: ["decison_tree"] + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] classifier_configs: decision_tree: max_depth: [1] diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 9bf64f7a67b4f460bfe43a88103ea0f03fbada13..cc637d2757c1b04aae97e7b634ab852d04fdecb2 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -48,7 +48,7 @@ def exec_monoview_multicore(directory, name, labels_names, classification_indice def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, KFolds, nbCores, databaseType, path, randomState, hyper_param_search="randomized_search", - metrics=[["accuracy_score", None]], nIter=30, view_name="", **args): + metrics=[["accuracy_score", None]], n_iter=30, view_name="", **args): logging.debug("Start:\t Loading data") kwargs, \ t_start, \ @@ -82,7 +82,7 @@ def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, classifierModule = getattr(monoview_classifiers, classifier_name) classifier_class_name = classifierModule.classifier_class_name clKWARGS, testFoldsPreds = getHPs(classifierModule, hyper_param_search, - nIter, classifier_name, classifier_class_name, + n_iter, classifier_name, classifier_class_name, X_train, y_train, randomState, outputFileName, KFolds, nbCores, metrics, kwargs) @@ -116,7 +116,7 @@ def exec_monoview(directory, X, Y, name, labels_names, classificationIndices, stringAnalysis, \ imagesAnalysis, \ metricsScores = execute(name, classificationIndices, KFolds, nbCores, - hyper_parameter_search, metrics, nIter, feat, classifier_name, + hyper_parameter_search, metrics, n_iter, feat, classifier_name, clKWARGS, labels_names, X.shape, y_train, y_train_pred, y_test, y_test_pred, t_end, randomState, classifier, outputFileName) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py index e079dae7b54fda07d3e036bf21d0d762f6f47d30..c52e0bfd831960642fefce6358c95ecfc3db3a80 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/adaboost.py @@ -48,6 +48,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): [estim.predict(X) for estim in self.estimators_]) self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) + return self def canProbas(self): """Used to know if the classifier can return label probabilities""" diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py index 969b157149b294c59a9850c1a0ecdb147ed2b248..a55d54ae7f8acf59acf4171c37a8ac9489743671 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/min_cq_graalpy.py @@ -30,7 +30,7 @@ class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): def canProbas(self): """Used to know if the classifier can return label probabilities""" - return True + return False def set_params(self, **params): self.mu = params["mu"] diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py index ab9afe67c6bc35f9f2d368d66420e9e760a267b7..4988a2a7e15a421e74043ab6d465a6156488e930 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/scm.py @@ -62,7 +62,7 @@ class SCM(scm, BaseMonoviewClassifier): def canProbas(self): """Used to know if the classifier can return label probabilities""" - return True + return False def getInterpret(self, directory, y_test): interpretString = "Model used : " + str(self.model_) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 9d22a6d457487b43bd3457c72802a1530fca340c..bf68fa9b1680a91779c5884d361ba888d6f1c941 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -141,9 +141,9 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") - train_labels = classifier.predict(dataset_var, predict_indices=learning_indices, + train_labels = classifier.predict(dataset_var, example_indices=learning_indices, view_indices=views_indices) - test_labels = classifier.predict(dataset_var, predict_indices=validation_indices, + test_labels = classifier.predict(dataset_var, example_indices=validation_indices, view_indices=views_indices) full_labels = np.zeros(labels.shape, dtype=int) - 100 for train_index, index in enumerate(learning_indices): diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index 6e017ba764c0eb697f49ffb1bb89011e2b1976b9..02956fa810e828b35f7e66f04d3b2ad748abd269 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -19,7 +19,7 @@ class MultiviewResult(object): multiview_classifier_module = getattr(multiview_classifiers, self.classifier_name) multiview_classifier = getattr(multiview_classifier_module, - multiview_classifier_module.classifier_class_name)() + multiview_classifier_module.classifier_class_name)(42) return multiview_classifier.short_name @@ -32,6 +32,7 @@ class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): def __init__(self, random_state): self.random_state = random_state self.short_name = self.__class__.__name__ + self.weird_strings = {} def genBestParams(self, detector): return dict((param_name, detector.best_params_[param_name]) @@ -102,7 +103,8 @@ class ConfigGenerator(): self.distribs[classifier_name] = dict((param_name, param_distrib) for param_name, param_distrib in zip(classifier_class().param_names, - classifier_class().distribs)) + classifier_class().distribs) + if param_name!="random_state") def rvs(self, random_state=None): config_sample = {} @@ -117,11 +119,19 @@ class ConfigGenerator(): return config_sample -def get_available_monoview_classifiers(): - classifiers_names = [module_name +def get_available_monoview_classifiers(need_probas=False): + available_classifiers = [module_name for module_name in dir(monoview_classifiers) if not module_name.startswith("__")] - return classifiers_names + if need_probas: + proba_classifiers = [] + for module_name in available_classifiers: + module = getattr(monoview_classifiers, module_name) + can_probas = getattr(module, module.classifier_class_name)().canProbas() + if can_probas: + proba_classifiers.append(module_name) + available_classifiers = proba_classifiers + return available_classifiers def get_monoview_classifier(classifier_name): classifier_module = getattr(monoview_classifiers, classifier_name) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py index 12adf384a462b064ea9042c6327ebd225f3e5d5a..7b40beaff1366dae890b5d65e99a7cbbea452809 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/diversity_utils.py @@ -8,11 +8,11 @@ import numpy as np from ...multiview.multiview_utils import ConfigGenerator, \ get_examples_views_indices, get_available_monoview_classifiers, \ BaseMultiviewClassifier -from .fusion_utils import BaseLateFusionClassifier +from .fusion_utils import BaseFusionClassifier class DiversityFusionClassifier(BaseMultiviewClassifier, - BaseLateFusionClassifier): + BaseFusionClassifier): """This is the base class for all the diversity fusion based classifiers.""" def __init__(self, random_state=None, classifier_names=None, @@ -27,37 +27,36 @@ class DiversityFusionClassifier(BaseMultiviewClassifier, self.estimator_pool = monoview_estimators self.classifier_configs = classifier_configs - def fit(self, X, y, train_indices=None, views_indices=None): - train_indices, views_indices = get_examples_views_indices(X, - train_indices, - views_indices) + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, view_indices = get_examples_views_indices(X, + train_indices, + view_indices) if self.estimator_pool is None: self.estimator_pool = [] for classifier_idx, classifier_name in enumerate(self.classifier_names): self.estimator_pool.append([]) - estimator = self.init_monoview_estimator(classifier_name) - for idx, view_idx in enumerate(views_indices): + estimator = self.init_monoview_estimator(classifier_name, self.classifier_configs) + for idx, view_idx in enumerate(view_indices): estimator.fit(X.get_v(view_idx, train_indices), y[train_indices]) self.estimator_pool[classifier_idx].append(estimator) else: pass #TODO - self.monoview_estimators = self.choose_combination(X, y, train_indices, views_indices) + self.choose_combination(X, y, train_indices, view_indices) return self - def predict(self, X, example_indices=None, views_indices=None): + def predict(self, X, example_indices=None, view_indices=None): """Just a weighted majority vote""" - example_indices, views_indices = get_examples_views_indices(X, - example_indices, - views_indices) + example_indices, view_indices = get_examples_views_indices(X, + example_indices, + view_indices) nb_class = X.get_nb_class(example_indices) votes = np.zeros((len(example_indices), nb_class), dtype=float) monoview_predictions = [monoview_estimator.predict(X.get_v(view_idx, example_indices)) for view_idx, monoview_estimator - in zip(views_indices, self.monoview_estimators)] + in zip(view_indices, self.monoview_estimators)] for idx, example_index in enumerate(example_indices): for monoview_estimator_index, monoview_prediciton in enumerate(monoview_predictions): - votes[idx, monoview_prediciton[ - example_index]] += 1 + votes[idx, int(monoview_prediciton[idx])] += 1 predicted_labels = np.argmax(votes, axis=1) return predicted_labels @@ -94,7 +93,7 @@ class GlobalDiversityFusionClassifier(DiversityFusionClassifier): X, examples_indices, view_indices) for combinationsIndex, combination in enumerate(combinations): combis[combinationsIndex] = combination - div_measure[combinationsIndex] = self.diversity_score( + div_measure[combinationsIndex] = self.diversity_measure( classifiers_decisions, combination, y[examples_indices]) @@ -123,9 +122,9 @@ class CoupleDiversityFusionClassifier(DiversityFusionClassifier): (view_index_1, classifier_index_1), ( view_index_2, classifier_index_2) = binome couple_diversity = np.mean( - self.diversity_score( - classifiers_decisions[view_index_1, classifier_index_1], - classifiers_decisions[view_index_2, classifier_index_2], + self.diversity_measure( + classifiers_decisions[classifier_index_1, view_index_1], + classifiers_decisions[classifier_index_2, view_index_2], y[examples_indices]) ) couple_diversities[binome_index] = couple_diversity diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py index 47a5f15f6eacf3802afdc56af8429d8d38717de4..7c269dfc033dd2ccb9a34420cf76b9319703e1b1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/fusion_utils.py @@ -4,13 +4,14 @@ import inspect from ...multiview.multiview_utils import get_monoview_classifier -class BaseLateFusionClassifier(): +class BaseFusionClassifier(): - def init_monoview_estimator(self, classifier_name, classifier_index=None): + def init_monoview_estimator(self, classifier_name, classifier_config, + classifier_index=None,): if classifier_index is not None: - classifier_configs = self.classifier_configs[classifier_name] + classifier_configs = classifier_config[classifier_name] else: - classifier_configs = self.classifier_configs + classifier_configs = classifier_config if classifier_configs is not None and classifier_name in classifier_configs: if 'random_state' in inspect.getfullargspec( get_monoview_classifier(classifier_name).__init__).args: diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py index 189e16804eb90545cbbc1964859d4559415086c6..f768ecc695877b3fe17665b641df276855cb2631 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/additions/late_fusion_utils.py @@ -4,90 +4,138 @@ from scipy.stats import uniform from ...multiview.multiview_utils import BaseMultiviewClassifier, get_available_monoview_classifiers, get_monoview_classifier, get_examples_views_indices, ConfigGenerator -from .fusion_utils import BaseLateFusionClassifier +from .fusion_utils import BaseFusionClassifier + + +class ClassifierDistribution: + + def __init__(self, seed=42, available_classifiers=None): + self.random_state = np.random.RandomState(seed) + self.available_classifiers = available_classifiers + + def draw(self, nb_view): + return self.random_state.choice(self.available_classifiers, + size=nb_view, replace=True) class ClassifierCombinator: - def __init__(self, nb_view): - self.nb_view = nb_view - self.available_classifiers = get_available_monoview_classifiers() + def __init__(self, need_probas=False): + self.available_classifiers = get_available_monoview_classifiers(need_probas) def rvs(self, random_state=None): - classifier_names = random_state.choice(self.available_classifiers, - size=self.nb_view, replace=True) - return classifier_names + return ClassifierDistribution(seed=random_state.randint(1), + available_classifiers=self.available_classifiers) -class MultipleConfigGenerator(ConfigGenerator): - def __init__(self, nb_view): - super(MultipleConfigGenerator, self).__init__(get_available_monoview_classifiers()) - self.nb_view = nb_view - self.multiple_distribs = [self.distribs for _ in range(nb_view)] +class ConfigDistribution: - def rvs(self, random_state=None): - config_samples = [super(MultipleConfigGenerator, self).rvs(random_state) - for _ in range(self.nb_view)] + def __init__(self, seed=42, available_classifiers=None): + self.random_state = np.random.RandomState(seed) + self.config_generator = ConfigGenerator(available_classifiers) + + def draw(self, nb_view): + config_samples = [self.config_generator.rvs(self.random_state) + for _ in range(nb_view)] return config_samples + +class MultipleConfigGenerator: + + def __init__(self,): + self.available_classifiers = get_available_monoview_classifiers() + + def rvs(self, random_state=None): + return ConfigDistribution(seed=random_state.randint(1), + available_classifiers=self.available_classifiers) + + +class WeightDistribution: + + def __init__(self, seed=42, distribution_type="uniform"): + self.random_state = np.random.RandomState(seed) + self.distribution_type = distribution_type + + def draw(self, nb_view): + if self.distribution_type=="uniform": + return self.random_state.random_sample(nb_view) + + class WeightsGenerator: - def __init__(self, nb_view): - self.nb_view=nb_view - self.uniform = uniform(loc=0, state=1) + def __init__(self, distibution_type="uniform"): + self.distribution_type=distibution_type def rvs(self, random_state=None): - return np.array([uniform.rvs(random_state=random_state) - for _ in range(self.nb_view)]) + return WeightDistribution(seed=random_state.randint(1), + distribution_type=self.distribution_type) -class LateFusionClassifier(BaseMultiviewClassifier, BaseLateFusionClassifier): +class LateFusionClassifier(BaseMultiviewClassifier, BaseFusionClassifier): def __init__(self, random_state=None, classifier_names=None, - classifier_configs=None, nb_cores=1, nb_view=None, weights=None): + classifier_configs=None, nb_cores=1, weights=None): super(LateFusionClassifier, self).__init__(random_state) - self.verif_clf_views(classifier_names, nb_view) - print(classifier_names) - self.nb_view = len(classifier_names) self.classifiers_names = classifier_names self.classifier_configs = classifier_configs - self.monoview_estimators = [self.init_monoview_estimator(classifier_name, classifier_index) - for classifier_index, classifier_name - in enumerate(self.classifiers_names)] self.nb_cores = nb_cores - self.accuracies = np.zeros(len(classifier_names)) - self.needProbas = False - if weights is None: - self.weights = np.ones(nb_view)/nb_view - else: - self.weights = weights + self.weights = weights self.param_names = ["classifier_names", "classifier_configs", "weights"] - self.distribs =[ClassifierCombinator(self.nb_view), - MultipleConfigGenerator(self.nb_view), - WeightsGenerator(nb_view)] + self.distribs =[ClassifierCombinator(need_probas=self.need_probas), + MultipleConfigGenerator(), + WeightsGenerator()] + + def fit(self, X, y, train_indices=None, view_indices=None): + self.init_params(X.nb_view) - def fit(self, X, y, train_indices=None, views_indices=None): - train_indices, views_indices = get_examples_views_indices(X, + train_indices, view_indices = get_examples_views_indices(X, train_indices, - views_indices) - self.monoview_estimators = [monoview_estimator.fit(X.get_v(view_index, train_indices), y[train_indices]) for view_index, monoview_estimator in zip(views_indices, self.monoview_estimators)] + view_indices) + self.monoview_estimators = [monoview_estimator.fit(X.get_v(view_index, train_indices), + y[train_indices]) + for view_index, monoview_estimator + in zip(view_indices, + self.monoview_estimators)] return self - def verif_clf_views(self, classifier_names, nb_view): - if classifier_names is None: - if nb_view is None: - raise AttributeError(self.__class__.__name__+" must have either classifier_names or nb_views provided.") - else: - self.classifiers_names = self.get_classifiers(get_available_monoview_classifiers(), nb_view) + def init_params(self, nb_view): + if self.weights is None: + self.weights = np.ones(nb_view) / nb_view + elif isinstance(self.weights, WeightDistribution): + self.weights = self.weights.draw(nb_view) else: - if nb_view is None: - self.classifiers_names = classifier_names - else: - if len(classifier_names)==nb_view: - self.classifiers_names = classifier_names - else: - warnings.warn("nb_view and classifier_names not matching, choosing nb_view random classifiers in classifier_names.", UserWarning) - self.classifiers_names = self.get_classifiers(classifier_names, nb_view) + self.weights = self.weights/np.sum(self.weights) + + if isinstance(self.classifiers_names, ClassifierDistribution): + self.classifiers_names = self.classifiers_names.draw(nb_view) + elif self.classifiers_names is None: + self.classifiers_names = ["decision_tree" for _ in range(nb_view)] + + if isinstance(self.classifier_configs, ConfigDistribution): + self.classifier_configs = self.classifier_configs.draw(nb_view) + + self.monoview_estimators = [ + self.init_monoview_estimator(classifier_name, + self.classifier_configs[classifier_index], + classifier_index=classifier_index) + for classifier_index, classifier_name + in enumerate(self.classifiers_names)] + + # def verif_clf_views(self, classifier_names, nb_view): + # if classifier_names is None: + # if nb_view is None: + # raise AttributeError(self.__class__.__name__+" must have either classifier_names or nb_views provided.") + # else: + # self.classifiers_names = self.get_classifiers(get_available_monoview_classifiers(), nb_view) + # else: + # if nb_view is None: + # self.classifiers_names = classifier_names + # else: + # if len(classifier_names)==nb_view: + # self.classifiers_names = classifier_names + # else: + # warnings.warn("nb_view and classifier_names not matching, choosing nb_view random classifiers in classifier_names.", UserWarning) + # self.classifiers_names = self.get_classifiers(classifier_names, nb_view) def get_classifiers(self, classifiers_names, nb_choices): diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py index 5da30a4a28f37be46ea3a0de05ed5270eebee10d..94f58adb78bea60b3edc8573b82df2eb65e7c497 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/bayesian_inference_fusion.py @@ -9,15 +9,18 @@ classifier_class_name = "BayesianInferenceClassifier" class BayesianInferenceClassifier(LateFusionClassifier): def __init__(self, random_state, classifier_names=None, - classifier_configs=None, nb_view=None, nb_cores=1): + classifier_configs=None, nb_view=None, nb_cores=1, weights=None): + self.need_probas=True super(BayesianInferenceClassifier, self).__init__(random_state=random_state, classifier_names=classifier_names, classifier_configs=classifier_configs, nb_cores=nb_cores, - nb_view=nb_view) + weights=weights) - def predict(self, X, example_indices=None, views_indices=None): - example_indices, views_indices = get_examples_views_indices(X, example_indices, views_indices) + def predict(self, X, example_indices=None, view_indices=None): + example_indices, views_indices = get_examples_views_indices(X, + example_indices, + view_indices) if sum(self.weights) != 1.0: self.weights = self.weights / sum(self.weights) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py index cb6b810f3628f977d9090211aa5bdb227576c529..b93e79a4fc5713eb9adc9e363be949eac89e35f6 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fat_late_fusion/fat_late_fusion.py @@ -67,9 +67,6 @@ class FatLateFusionClass: votes = np.zeros((len(usedIndices), DATASET.get("Metadata").attrs["nbClass"]), dtype=float) for usedIndex, exampleIndex in enumerate(usedIndices): for monoviewDecisionIndex, monoviewDecision in enumerate(self.monoviewDecisions): - print(monoviewDecision[exampleIndex]) - print(self.weights[monoviewDecisionIndex]) - print(votes[usedIndex, monoviewDecision[exampleIndex]]) votes[usedIndex, monoviewDecision[exampleIndex]] += self.weights[monoviewDecisionIndex] predictedLabels = np.argmax(votes, axis=1) return predictedLabels diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py index eed9638ee1d2aec59a80ef51150d6ad3c9f0f976..4f0f18e578089534807b8bcd0ebe3d46c41a58de 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/majority_voting_fusion.py @@ -11,29 +11,30 @@ class VotingIndecision(Exception): class MajorityVoting(LateFusionClassifier): def __init__(self, random_state, classifier_names=None, - classifier_configs=None, nb_view=None, nb_cores=1): + classifier_configs=None, weights=None, nb_cores=1): + self.need_probas=False super(MajorityVoting, self).__init__(random_state=random_state, classifier_names=classifier_names, classifier_configs=classifier_configs, nb_cores=nb_cores, - nb_view=nb_view) + weights=weights) - def predict(self, X, example_indices=None, views_indices=None): + def predict(self, X, example_indices=None, view_indices=None): examples_indices, views_indices = get_examples_views_indices(X, example_indices, - views_indices) + view_indices) n_examples = len(examples_indices) votes = np.zeros((n_examples, X.get_nb_class(example_indices)), dtype=float) - monoview_decisions = np.zeros((len(examples_indices), self.nb_view), dtype=int) + monoview_decisions = np.zeros((len(examples_indices), X.nb_view), dtype=int) for index, view_index in enumerate(views_indices): - monoview_decisions[:, index] = self.monoviewClassifiers[index].predict( + monoview_decisions[:, index] = self.monoview_estimators[index].predict( X.get_v(view_index, examples_indices)) for example_index in range(n_examples): for view_index, feature_classification in enumerate(monoview_decisions[example_index, :]): votes[example_index, feature_classification] += self.weights[view_index] nb_maximum = len(np.where(votes[example_index] == max(votes[example_index]))[0]) - if nb_maximum == self.nb_view: + if nb_maximum == X.nb_view: raise VotingIndecision("Majority voting can't decide, each classifier has voted for a different class") predicted_labels = np.argmax(votes, axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/scm_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/scm_late_fusion.py index 2e3fae5d703dee2dc186847dab8896fd52a21299..a8ec6bb2063760101b5be106141f9245843527fc 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/scm_late_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/scm_late_fusion.py @@ -47,16 +47,17 @@ class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): class SCMLateFusionClassifier(LateFusionClassifier): def __init__(self, random_state=None, classifier_names=None, - classifier_configs=None, nb_cores=1, nb_view=1, - p=1, max_attributes=5, order=1, model_type="conjunction"): + classifier_configs=None, nb_cores=1, + p=1, max_rules=5, order=1, model_type="conjunction", weights=None): + self.need_probas=False super(SCMLateFusionClassifier, self).__init__(random_state=random_state, classifier_names=classifier_names, classifier_configs=classifier_configs, - nb_cores=nb_cores, - nb_view=nb_view) + nb_cores=nb_cores + ) self.scm_classifier = None self.p = p - self.max_attributes = max_attributes + self.max_rules = max_rules self.order = order self.model_type = model_type self.param_names+=["model_type", "max_rules", "p", "order"] @@ -67,7 +68,7 @@ class SCMLateFusionClassifier(LateFusionClassifier): def fit(self, X, y, train_indices=None, view_indices=None): super(SCMLateFusionClassifier, self).fit(X, y, train_indices=train_indices, - views_indices=view_indices) + view_indices=view_indices) self.scm_fusion_fit(X, y, train_indices=train_indices, view_indices=view_indices) return self @@ -75,7 +76,7 @@ class SCMLateFusionClassifier(LateFusionClassifier): example_indices, view_indices = get_examples_views_indices(X, example_indices, view_indices) - monoview_decisions = np.zeros((len(example_indices), self.nb_view), + monoview_decisions = np.zeros((len(example_indices), X.nb_view), dtype=int) for index, view_index in enumerate(view_indices): monoview_decision = self.monoview_estimators[index].predict( @@ -88,11 +89,11 @@ class SCMLateFusionClassifier(LateFusionClassifier): def scm_fusion_fit(self, X, y, train_indices=None, view_indices=None): train_indices, view_indices = get_examples_views_indices(X, train_indices, view_indices) - self.scm_classifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_attributes, model_type=self.model_type, - random_state=self.randomState) - monoview_decisions = np.zeros((len(train_indices), self.nb_view), dtype=int) + self.scm_classifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_rules, model_type=self.model_type, + random_state=self.random_state) + monoview_decisions = np.zeros((len(train_indices), X.nb_view), dtype=int) for index, view_index in enumerate(view_indices): - monoview_decisions[:, index] = self.monoviewClassifiers[index].predict( + monoview_decisions[:, index] = self.monoview_estimators[index].predict( X.get_v(view_index, train_indices)) features = self.generate_interactions(monoview_decisions) features = np.array([np.array([feat for feat in feature]) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index 66cbdf292bc55025056d997d22bac4c5dfbad36f..46a5cb9844870dd5a58b646786b7a90d8dc93b9e 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -2,18 +2,19 @@ import numpy as np import inspect # from ..utils.dataset import get_v + from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import BaseMultiviewClassifier from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import get_examples_views_indices from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import ConfigGenerator from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import get_available_monoview_classifiers - +from multiview_platform.mono_multi_view_classifiers.multiview_classifiers.additions.fusion_utils import BaseFusionClassifier from multiview_platform.mono_multi_view_classifiers import monoview_classifiers classifier_class_name = "WeightedLinearEarlyFusion" -class WeightedLinearEarlyFusion(BaseMultiviewClassifier): +class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): def __init__(self, random_state=None, view_weights=None, monoview_classifier_name="decision_tree", @@ -44,7 +45,7 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): monoview_classifier_class = getattr(monoview_classifier_module, monoview_classifier_module.classifier_class_name) self.monoview_classifier = monoview_classifier_class() - self.set_monoview_classifier_config(monoview_classifier_name, + self.init_monoview_estimator(monoview_classifier_name, monoview_classifier_config) return self @@ -59,8 +60,8 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): self.monoview_classifier.fit(X, y[train_indices]) return self - def predict(self, X, predict_indices=None, view_indices=None): - _, X = self.transform_data_to_monoview(X, predict_indices, view_indices) + def predict(self, X, example_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, example_indices, view_indices) predicted_labels = self.monoview_classifier.predict(X) return predicted_labels @@ -88,11 +89,11 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): , axis=1) return monoview_data - def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): - if monoview_classifier_name in monoview_classifier_config: - self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) - else: - self.monoview_classifier.set_params(**monoview_classifier_config) + # def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): + # if monoview_classifier_name in monoview_classifier_config: + # self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) + # else: + # self.monoview_classifier.set_params(**monoview_classifier_config) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py index fdefbdda1ea479f1e35eb6d0486e0b994a020266..9b64d9651d319a5bb79625ce1a9565838b61b991 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_late_fusion.py @@ -8,15 +8,15 @@ classifier_class_name = "WeightedLinearLateFusion" class WeightedLinearLateFusion(LateFusionClassifier): def __init__(self, random_state, classifier_names=None, - classifier_configs=None, nb_view=None, nb_cores=1): + classifier_configs=None, weights=None, nb_cores=1): + self.need_probas=True super(WeightedLinearLateFusion, self).__init__(random_state=random_state, classifier_names=classifier_names, classifier_configs=classifier_configs, - nb_cores=nb_cores, - nb_view=nb_view) + nb_cores=nb_cores,weights=weights) - def predict(self, X, example_indices=None, views_indices=None): - example_indices, views_indices = get_examples_views_indices(X, example_indices, views_indices) + def predict(self, X, example_indices=None, view_indices=None): + example_indices, views_indices = get_examples_views_indices(X, example_indices, view_indices) view_scores = [] for index, viewIndex in enumerate(views_indices): view_scores.append(np.array(self.monoview_estimators[index].predict_proba( diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 6dcbb4364d9861bbb2661405df6a8079a3dc1e10..a94f8e1ed9a8fb838a5ea897eb7ba4f540abb73b 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -147,22 +147,22 @@ def getExampleErrorsBiclass(groud_truth, results): Returns ------- - exampleErrors : dict of np.array + example_errors : dict of np.array For each classifier, has an entry with a `np.array` over the examples, with a 1 if the examples was well-classified, a 0 if not and if it's multiclass classification, a -100 if the examples was not seen during the one versus one classification. """ - exampleErrors = {} + example_errors = {} for classifierResult in results: - errorOnExamples = np.equal(classifierResult.full_labels_pred, + error_on_examples = np.equal(classifierResult.full_labels_pred, groud_truth).astype(int) unseenExamples = np.where(groud_truth == -100)[0] - errorOnExamples[unseenExamples] = -100 - exampleErrors[classifierResult.get_classifier_name()] = { - "errorOnExamples": errorOnExamples} + error_on_examples[unseenExamples] = -100 + example_errors[classifierResult.get_classifier_name()] = { + "error_on_examples": error_on_examples} - return exampleErrors + return example_errors def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35): @@ -383,7 +383,7 @@ def iterCmap(statsIter): def publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nbCopies, fileName, minSize=10, - width_denominator=2.0, height_denominator=20.0, statsIter=1): + width_denominator=2.0, height_denominator=20.0, stats_iter=1): r"""Used to generate a 2D plot of the errors. Parameters @@ -407,7 +407,7 @@ def publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nbCopies, To obtain the image width, the number of classifiers will be divided by this number. height_denominator : float, optional, default: 1.0 To obtain the image width, the number of examples will be divided by this number. - statsIter : int, optional, default: 1 + stats_iter : int, optional, default: 1 The number of statistical iterations realized. Returns @@ -417,26 +417,26 @@ def publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nbCopies, figHeight = max(nbExamples / height_denominator, minSize) figKW = {"figsize": (figWidth, figHeight)} fig, ax = plt.subplots(nrows=1, ncols=1, **figKW) - cmap, norm = iterCmap(statsIter) + cmap, norm = iterCmap(stats_iter) cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, aspect='auto') plt.title('Errors depending on the classifier') ticks = np.arange(nbCopies / 2 - 0.5, nbClassifiers * nbCopies, nbCopies) labels = classifiers_names plt.xticks(ticks, labels, rotation="vertical") - cbar = fig.colorbar(cax, ticks=[-100 * statsIter / 2, 0, statsIter]) + cbar = fig.colorbar(cax, ticks=[-100 * stats_iter / 2, 0, stats_iter]) cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right']) fig.tight_layout() fig.savefig(fileName + "error_analysis_2D.png", bbox_inches="tight", transparent=True) plt.close() -def publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, fileName): +def publishErrorsBarPlot(error_on_examples, nbClassifiers, nbExamples, fileName): r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples Parameters ---------- - errorOnExamples : np.array of shape `(nbExamples,)` + error_on_examples : np.array of shape `(nbExamples,)` An array counting how many classifiers failed to classifiy each examples. classifiers_names : list of str The names of the classifiers. @@ -452,7 +452,7 @@ def publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, fileName): """ fig, ax = plt.subplots() x = np.arange(nbExamples) - plt.bar(x, errorOnExamples) + plt.bar(x, error_on_examples) plt.ylim([0, nbClassifiers]) plt.title("Number of classifiers that failed to classify each example") fig.savefig(fileName + "error_analysis_bar.png", transparent=True) @@ -466,7 +466,7 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2): ---------- example_errors : dict of dicts of np.arrays A dictionary conatining all the useful data. Organized as : - `example_errors[<classifier_name>]["errorOnExamples"]` is a np.array of ints with a + `example_errors[<classifier_name>]["error_on_examples"]` is a np.array of ints with a - 1 if the classifier `<classifier_name>` classifier well the example, - 0 if it fail to classify the example, - -100 if it did not classify the example (multiclass one versus one). @@ -490,44 +490,44 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2): data : np.array of shape `(nbClassifiers, nbExamples)` A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well and -100 if the example was not classified. - errorOnExamples : np.array of shape `(nbExamples,)` + error_on_examples : np.array of shape `(nbExamples,)` An array counting how many classifiers failed to classifiy each examples. """ nbClassifiers = len(example_errors) - nbExamples = len(list(example_errors.values())[0]["errorOnExamples"]) + nbExamples = len(list(example_errors.values())[0]["error_on_examples"]) classifiers_names = example_errors.keys() data = np.zeros((nbExamples, nbClassifiers * nbCopies)) temp_data = np.zeros((nbExamples, nbClassifiers)) - for classifierIndex, (classifier_name, errorOnExamples) in enumerate( + for classifierIndex, (classifier_name, error_on_examples) in enumerate( example_errors.items()): for iter_index in range(nbCopies): - data[:, classifierIndex * nbCopies + iter_index] = errorOnExamples[ - "errorOnExamples"] - temp_data[:, classifierIndex] = errorOnExamples["errorOnExamples"] - errorOnExamples = -1 * np.sum(data, axis=1) / nbCopies + nbClassifiers + data[:, classifierIndex * nbCopies + iter_index] = error_on_examples[ + "error_on_examples"] + temp_data[:, classifierIndex] = error_on_examples["error_on_examples"] + error_on_examples = -1 * np.sum(data, axis=1) / nbCopies + nbClassifiers np.savetxt(base_file_name + "2D_plot_data.csv", data, delimiter=",") np.savetxt(base_file_name + "bar_plot_data.csv", temp_data, delimiter=",") - return nbClassifiers, nbExamples, nbCopies, classifiers_names, data, errorOnExamples + return nbClassifiers, nbExamples, nbCopies, classifiers_names, data, error_on_examples -def publishExampleErrors(exampleErrors, directory, databaseName, labels_names): +def publishExampleErrors(example_errors, directory, databaseName, labels_names): logging.debug("Start:\t Biclass Label analysis figure generation") base_file_name = directory + time.strftime( "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + "_vs_".join( labels_names) + "-" - nbClassifiers, nbExamples, nCopies, classifiers_names, data, errorOnExamples = gen_error_data( - exampleErrors, + nbClassifiers, nbExamples, nCopies, classifiers_names, data, error_on_examples = gen_error_data( + example_errors, base_file_name) publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nCopies, base_file_name) - publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, + publishErrorsBarPlot(error_on_examples, nbClassifiers, nbExamples, base_file_name) logging.debug("Done:\t Biclass Label analysis figures generation") @@ -604,8 +604,8 @@ def analyze_biclass(results, benchmark_argument_dictionaries, stats_iter, metric biclass_results[iteridex][ str(classifierPositive) + str(classifierNegative)] = { - "metricsScores": metrics_scores, - "exampleErrors": example_errors} + "metrics_scores": metrics_scores, + "example_errors": example_errors} logging.debug("Done:\t Analzing all biclass resuls") return results, biclass_results @@ -628,8 +628,8 @@ def gen_metrics_scores_multiclass(results, true_labels, metrics, train_indices, test_indices, multiclass_test_indices = classification_indices for classifier_name, resultDictionary in iter_results.items(): - if not "metricsScores" in resultDictionary: - results[iter_index][classifier_name]["metricsScores"] = {} + if not "metrics_scores" in resultDictionary: + results[iter_index][classifier_name]["metrics_scores"] = {} train_score = metric_module.score(true_labels[train_indices], resultDictionary["labels"][ train_indices], @@ -653,7 +653,7 @@ def get_error_on_labels_multiclass(multiclass_results, multiclass_labels): for classifier_name, classifier_results in iter_results.items(): error_on_examples = classifier_results["labels"] == multiclass_labels multiclass_results[iter_index][classifier_name][ - "errorOnExamples"] = error_on_examples.astype(int) + "error_on_examples"] = error_on_examples.astype(int) logging.debug("Done:\t Getting errors on each example for each classifier") @@ -671,11 +671,11 @@ def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories, classifiers_names = np.array([classifier_name for classifier_name in multiclass_results[iter_index].keys()]) train_scores = np.array([multiclass_results[iter_index][ - classifier_name]["metricsScores"][ + classifier_name]["metrics_scores"][ metric[0]][0] for classifier_name in classifiers_names]) validationScores = np.array([multiclass_results[iter_index][ - classifier_name]["metricsScores"][ + classifier_name]["metrics_scores"][ metric[0]][1] for classifier_name in classifiers_names]) @@ -703,14 +703,14 @@ def publishMulticlassExmapleErrors(multiclass_results, directories, base_file_name = directory + time.strftime( "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" - nbClassifiers, nbExamples, nCopies, classifiers_names, data, errorOnExamples = gen_error_data( + nbClassifiers, nbExamples, nCopies, classifiers_names, data, error_on_examples = gen_error_data( multiclassResult, base_file_name) publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, nCopies, base_file_name) - publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, + publishErrorsBarPlot(error_on_examples, nbClassifiers, nbExamples, base_file_name) logging.debug("Done:\t Multiclass Label analysis figure generation") @@ -793,7 +793,7 @@ def publish_iter_biclass_metrics_scores(iter_results, directory, labels_dictiona if exc.errno != errno.EEXIST: raise - for metricName, scores in iterResult["metricsScores"].items(): + for metricName, scores in iterResult["metrics_scores"].items(): trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"]) testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"]) @@ -803,24 +803,22 @@ def publish_iter_biclass_metrics_scores(iter_results, directory, labels_dictiona stats_iter) + "_iter-" + metricName + ".png" nbResults = names.shape[0] - plotMetricScores(train_scores=trainMeans, test_scores=testMeans, - names=names, nbResults=nbResults, - metricName=metricName, fileName=fileName, - tag=" averaged", + plotMetricScores(trainMeans, testMeans, names, nbResults, + metricName, fileName, tag=" averaged", train_STDs=trainSTDs, test_STDs=testSTDs) results+=[[classifiersName, metricName, testMean, testSTD] for classifiersName, testMean, testSTD in zip(names, testMeans, testSTDs)] return results def gen_error_dat_glob(combi_results, stats_iter, base_file_name): - nbExamples = combi_results["errorOnExamples"].shape[1] - nbClassifiers = combi_results["errorOnExamples"].shape[0] - data = np.transpose(combi_results["errorOnExamples"]) - errorOnExamples = -1 * np.sum(data, axis=1) + (nbClassifiers * stats_iter) + nbExamples = combi_results["error_on_examples"].shape[1] + nbClassifiers = combi_results["error_on_examples"].shape[0] + data = np.transpose(combi_results["error_on_examples"]) + error_on_examples = -1 * np.sum(data, axis=1) + (nbClassifiers * stats_iter) np.savetxt(base_file_name + "clf_errors.csv", data, delimiter=",") - np.savetxt(base_file_name + "example_errors.csv", errorOnExamples, + np.savetxt(base_file_name + "example_errors.csv", error_on_examples, delimiter=",") - return nbExamples, nbClassifiers, data, errorOnExamples + return nbExamples, nbClassifiers, data, error_on_examples def publish_iter_biclass_example_errors(iter_results, directory, labels_dictionary, @@ -836,13 +834,13 @@ def publish_iter_biclass_example_errors(iter_results, directory, labels_dictiona logging.debug( "Start:\t Global biclass label analysis figure generation") - nbExamples, nbClassifiers, data, errorOnExamples = gen_error_dat_glob( + nbExamples, nbClassifiers, data, error_on_examples = gen_error_dat_glob( combiResults, stats_iter, base_file_name) publish2Dplot(data, classifiers_names, nbClassifiers, nbExamples, 1, base_file_name, stats_iter=stats_iter) - publishErrorsBarPlot(errorOnExamples, nbClassifiers * stats_iter, + publishErrorsBarPlot(error_on_examples, nbClassifiers * stats_iter, nbExamples, base_file_name) logging.debug( @@ -853,7 +851,7 @@ def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_ data_base_name, directory, stats_iter, min_size=10): results = [] - for metric_name, scores in iter_multiclass_results["metricsScores"].items(): + for metric_name, scores in iter_multiclass_results["metrics_scores"].items(): trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"]) testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"]) @@ -863,10 +861,8 @@ def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_ "%Y_%m_%d-%H_%M_%S") + "-" + data_base_name + "-Mean_on_" + str( stats_iter) + "_iter-" + metric_name + ".png" - plotMetricScores(train_scores=trainMeans, test_scores=testMeans, - names=classifiers_names, nbResults=nb_results, - metricName=metric_name, fileName=file_name, - tag=" averaged multiclass", + plotMetricScores(trainMeans, testMeans, classifiers_names, nb_results, + metric_name, file_name, tag=" averaged multiclass", train_STDs=trainSTDs, test_STDs=testSTDs) results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)] @@ -895,7 +891,7 @@ def gen_classifiers_dict(results, metrics): classifiers_dict = dict((classifier_name, classifierIndex) for classifierIndex, classifier_name in enumerate( - results[0][list(results[0].keys())[0]]["metricsScores"][metrics[0][0]][ + results[0][list(results[0].keys())[0]]["metrics_scores"][metrics[0][0]][ "classifiers_names"])) return classifiers_dict, len(classifiers_dict) @@ -904,19 +900,19 @@ def add_new_labels_combination(iterBiclassResults, labelsComination, nbClassifiers, nbExamples): if labelsComination not in iterBiclassResults: iterBiclassResults[labelsComination] = {} - iterBiclassResults[labelsComination]["metricsScores"] = {} + iterBiclassResults[labelsComination]["metrics_scores"] = {} - iterBiclassResults[labelsComination]["errorOnExamples"] = np.zeros( + iterBiclassResults[labelsComination]["error_on_examples"] = np.zeros( (nbClassifiers, nbExamples), dtype=int) return iterBiclassResults -def add_new_metric(iter_biclass_results, metric, labels_comination, nb_classifiers, +def add_new_metric(iter_biclass_results, metric, labels_combination, nb_classifiers, stats_iter): - if metric[0] not in iter_biclass_results[labels_comination]["metrics_scores"]: - iter_biclass_results[labels_comination]["metrics_scores"][metric[0]] = { + if metric[0] not in iter_biclass_results[labels_combination]["metrics_scores"]: + iter_biclass_results[labels_combination]["metrics_scores"][metric[0]] = { "train_scores": np.zeros((nb_classifiers, stats_iter)), "test_scores": @@ -953,11 +949,10 @@ def analyzebiclass_iter(biclass_results, metrics, stats_iter, directory, iter_biclass_results[labelsComination]["metrics_scores"][ metric[0]]["test_scores"][ classifiers_dict[classifier_name], iter_index] = testScore - - for classifier_name, errorOnExample in results[ + for classifier_name, error_on_example in results[ "example_errors"].items(): iter_biclass_results[labelsComination]["error_on_examples"][ - classifiers_dict[classifier_name], :] += errorOnExample[ + classifiers_dict[classifier_name], :] += error_on_example[ "error_on_examples"] results = publish_iter_biclass_metrics_scores( diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index 2ab4003f543607ffe69be5c8897e9864c54b2534..7a4cb476135799607048a343b5f222cf82b4c773 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -17,7 +17,8 @@ class Dataset(): def __init__(self, views=None, labels=None, are_sparse=False, file_name="dataset.hdf5", view_names=None, path="", - hdf5_file=None, labels_names=None): + hdf5_file=None, labels_names=None, is_temp=False): + self.is_temp = False if hdf5_file is not None: self.dataset=hdf5_file self.init_attrs() @@ -55,6 +56,12 @@ class Dataset(): dataset_file.close() self.update_hdf5_dataset(os.path.join(path, file_name)) + def rm(self): + filename = self.dataset.filename + self.dataset.close() + if self.is_temp: + os.remove(filename) + def get_view_name(self, view_idx): return self.dataset["View"+str(view_idx)].attrs["name"] @@ -142,6 +149,7 @@ class Dataset(): if hasattr(self, 'dataset'): self.dataset.close() self.dataset = h5py.File(path, 'r') + self.is_temp = True self.init_attrs() def filter(self, labels, label_names, example_indices, view_names, path): @@ -267,6 +275,7 @@ class Dataset(): + def datasets_already_exist(pathF, name, nbCores): """Used to check if it's necessary to copy datasets""" allDatasetExist = True @@ -393,7 +402,7 @@ def copy_hdf5(pathF, name, nbCores): datasetFile.copy("/" + dataset, newDataSet["/"]) newDataSet.close() -def delete_HDF5(benchmarkArgumentsDictionaries, nbCores, DATASET): +def delete_HDF5(benchmarkArgumentsDictionaries, nbCores, dataset): """Used to delete temporary copies at the end of the benchmark""" if nbCores > 1: logging.debug("Start:\t Deleting " + str( @@ -403,10 +412,8 @@ def delete_HDF5(benchmarkArgumentsDictionaries, nbCores, DATASET): for coreIndex in range(nbCores): os.remove(args["Base"]["pathf"] + args["Base"]["name"] + str(coreIndex) + ".hdf5") - filename = DATASET.filename - DATASET.close() - if "_temp_" in filename: - os.remove(filename) + if dataset.is_temp: + dataset.rm() def confirm(resp=True, timeout=15): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 0aec7f32fdf001264e18ff87aaa7af73a9529119..a3f2e1d1d480a3bac9f12ac83931549741d4a757 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -125,6 +125,72 @@ class DatasetError(Exception): def __init__(self, *args, **kwargs): Exception.__init__(self, *args, **kwargs) +def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, + random_state, full=False, add_noise=False, noise_std=0.15, + path_for_new="../data/"): + """Used to load a hdf5 database""" + if full: + dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + dataset = Dataset(hdf5_file=dataset_file) + dataset_name = name_DB + labels_dictionary = dict((label_index, label_name) + for label_index, label_name + in enumerate(dataset.get_label_names())) + else: + dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + dataset = Dataset(hdf5_file=dataset_file) + labels_dictionary = dataset.select_views_and_labels(nb_labels=nb_class, + selected_label_names=asked_labels_names, + view_names=views, random_state=random_state, + path_for_new=path_for_new) + dataset_name = dataset.get_name() + + if add_noise: + dataset.add_gaussian_noise(random_state, path_for_new, noise_std) + dataset_name = dataset.get_name() + else: + pass + return dataset, labels_dictionary, dataset_name + + +def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, + random_state, full=False, add_noise=False, noise_std=0.15, + delimiter=",", path_for_new="../data/"): + # TODO : Update this one + labels_names = np.genfromtxt(pathF + nameDB + "-labels-names.csv", + dtype='str', delimiter=delimiter) + datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") + labels = np.genfromtxt(pathF + nameDB + "-labels.csv", delimiter=delimiter) + labelsDset = datasetFile.create_dataset("Labels", labels.shape, data=labels) + labelsDset.attrs["names"] = [labelName.encode() for labelName in + labels_names] + viewFileNames = [viewFileName for viewFileName in + os.listdir(pathF + "Views/")] + for viewIndex, viewFileName in enumerate(os.listdir(pathF + "Views/")): + viewFile = pathF + "Views/" + viewFileName + if viewFileName[-6:] != "-s.csv": + viewMatrix = np.genfromtxt(viewFile, delimiter=delimiter) + viewDset = datasetFile.create_dataset("View" + str(viewIndex), + viewMatrix.shape, + data=viewMatrix) + del viewMatrix + viewDset.attrs["name"] = viewFileName[:-4] + viewDset.attrs["sparse"] = False + else: + pass + metaDataGrp = datasetFile.create_group("Metadata") + metaDataGrp.attrs["nbView"] = len(viewFileNames) + metaDataGrp.attrs["nbClass"] = len(labels_names) + metaDataGrp.attrs["datasetLength"] = len(labels) + datasetFile.close() + datasetFile, labelsDictionary, dataset_name = get_classic_db_hdf5(views, pathF, nameDB, + NB_CLASS, askedLabelsNames, + random_state, full, + path_for_new=path_for_new) + + return datasetFile, labelsDictionary, dataset_name + + # # def get_classes(labels): # labels_set = set(list(labels)) @@ -260,32 +326,7 @@ class DatasetError(Exception): # new_d_set.attrs[key] = value -def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, - random_state, full=False, add_noise=False, noise_std=0.15, - path_for_new="../data/"): - """Used to load a hdf5 database""" - if full: - dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") - dataset = Dataset(hdf5_file=dataset_file) - dataset_name = name_DB - labels_dictionary = dict((label_index, label_name) - for label_index, label_name - in enumerate(dataset.get_label_names())) - else: - dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") - dataset = Dataset(hdf5_file=dataset_file) - labels_dictionary = dataset.select_views_and_labels(nb_labels=nb_class, - selected_label_names=asked_labels_names, - view_names=views, random_state=random_state, - path_for_new=path_for_new) - dataset_name = dataset.get_name() - if add_noise: - dataset.add_gaussian_noise(random_state, path_for_new, noise_std) - dataset_name = dataset.get_name() - else: - pass - return dataset, labels_dictionary, dataset_name # # def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, @@ -323,43 +364,6 @@ def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, # return noisy_dataset, dataset_name + "_noised" -def get_classic_db_csv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, - random_state, full=False, add_noise=False, noise_std=0.15, - delimiter=",", path_for_new="../data/"): - # TODO : Update this one - labels_names = np.genfromtxt(pathF + nameDB + "-labels-names.csv", - dtype='str', delimiter=delimiter) - datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") - labels = np.genfromtxt(pathF + nameDB + "-labels.csv", delimiter=delimiter) - labelsDset = datasetFile.create_dataset("Labels", labels.shape, data=labels) - labelsDset.attrs["names"] = [labelName.encode() for labelName in - labels_names] - viewFileNames = [viewFileName for viewFileName in - os.listdir(pathF + "Views/")] - for viewIndex, viewFileName in enumerate(os.listdir(pathF + "Views/")): - viewFile = pathF + "Views/" + viewFileName - if viewFileName[-6:] != "-s.csv": - viewMatrix = np.genfromtxt(viewFile, delimiter=delimiter) - viewDset = datasetFile.create_dataset("View" + str(viewIndex), - viewMatrix.shape, - data=viewMatrix) - del viewMatrix - viewDset.attrs["name"] = viewFileName[:-4] - viewDset.attrs["sparse"] = False - else: - pass - metaDataGrp = datasetFile.create_group("Metadata") - metaDataGrp.attrs["nbView"] = len(viewFileNames) - metaDataGrp.attrs["nbClass"] = len(labels_names) - metaDataGrp.attrs["datasetLength"] = len(labels) - datasetFile.close() - datasetFile, labelsDictionary, dataset_name = get_classic_db_hdf5(views, pathF, nameDB, - NB_CLASS, askedLabelsNames, - random_state, full, - path_for_new=path_for_new) - - return datasetFile, labelsDictionary, dataset_name - # def getLabelSupports(CLASS_LABELS): # """Used to get the number of example for each label""" # labels = set(CLASS_LABELS) diff --git a/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py b/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py index 8e1f77d22fb7544aa53ceafb32a848e30eac261a..61595f63880c5cc1a01d5b5e96cf68cbf7c397c2 100644 --- a/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py +++ b/multiview_platform/tests/test_multiview_classifiers/test_additions/test_diversity_utils.py @@ -29,7 +29,7 @@ class FakeDivCoupleClf(du.CoupleDiversityFusionClassifier): monoview_estimators=monoview_estimators) self.rs = rs - def diversity_score(self, a, b, c): + def diversity_measure(self, a, b, c): return self.rs.randint(0,100) @@ -43,7 +43,7 @@ class FakeDivGlobalClf(du.GlobalDiversityFusionClassifier): monoview_estimators=monoview_estimators) self.rs = rs - def diversity_score(self, a, b, c): + def diversity_measure(self, a, b, c): return self.rs.randint(0,100) class Test_DiversityFusion(unittest.TestCase):