From e5c042815ab517750d438a66798abe2e268e1e5e Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Thu, 28 Oct 2021 07:57:48 -0400
Subject: [PATCH] added some files

---
 config_files/config_cuisine.yml               | 88 ++++++++++++++-----
 config_files/config_private_algos.yml         | 26 +++---
 setup.py                                      |  1 +
 summit/multiview_platform/exec_classif.py     |  6 +-
 .../metrics/balanced_accuracy.py              | 41 +++++++++
 .../monoview_classifiers/adaboost.py          |  2 +-
 .../monoview_classifiers/adaboost_pregen.py   |  1 +
 .../monoview_classifiers/samba.py             | 70 +++++++++++++++
 .../early_fusion_samba.py                     | 40 +++++++++
 .../multiview_classifiers/mumbo.py            |  7 ++
 10 files changed, 246 insertions(+), 36 deletions(-)
 create mode 100644 summit/multiview_platform/metrics/balanced_accuracy.py
 create mode 100644 summit/multiview_platform/monoview_classifiers/samba.py
 create mode 100644 summit/multiview_platform/multiview_classifiers/early_fusion_samba.py

diff --git a/config_files/config_cuisine.yml b/config_files/config_cuisine.yml
index 5a916146..9206cf2f 100644
--- a/config_files/config_cuisine.yml
+++ b/config_files/config_cuisine.yml
@@ -1,10 +1,10 @@
 # The base configuration of the benchmark
 log: True
-name: ["ionosphere", "abalone", "australian", "balance", "bupa", "cylinder", "hepatitis", "pima", "yeast", "zoo"]
-label: "comp_1"
+name: ['tnbc_mazid']
+label: ""
 file_type: ".hdf5"
 views:
-pathf: "/home/baptiste/Documents/Datasets/UCI/both/"
+pathf: "/home/baptiste/Documents/Datasets/Mazid/"
 nice: 0
 random_state: 42
 nb_cores: 1
@@ -13,34 +13,53 @@ debug: True
 add_noise: False
 noise_std: 0.0
 res_dir: "../results/"
-track_tracebacks: False
+track_tracebacks: True
 
 # All the classification-realted configuration options
 multiclass_method: "oneVersusOne"
-split: 0.50
+split: 0.30
 nb_folds: 5
 nb_class: 2
 classes:
-type: ["monoview",]
-algos_monoview: ["cb_boost", "self_opt_cb", "adaboost", "cq_boost", "min_cq", "adaboost_pregen", "self_opt_cb_pseudo", "self_opt_cb_root"]
-algos_multiview: ["mv_cb_boost","early_fusion_dt", "early_fusion_cb", "early_fusion_rf","mumbo", "early_fusion_svm" ]
+type: ["monoview","multiview"]
+algos_monoview: ["samba", "scm_bagging", "random_forest", "adaboost", 'scm']
+algos_multiview: ["early_fusion_adaboost",  "early_fusion_decision_tree", "early_fusion_random_forest", "early_fusion_samba"]
 stats_iter: 5
 metrics:
-  accuracy_score: {}
+  balanced_accuracy: {}
   f1_score:
     average: 'micro'
-metric_princ: "accuracy_score"
-hps_type: "None"
+  accuracy_score: {}
+metric_princ: "balanced_accuracy"
+hps_type: "Random"
 hps_args:
-  n_iter: 30
-  equivalent_draws: True
+  n_iter: 20
+  equivalent_draws: False
 
 svm_rbf:
   C: 0.7
 
+scm_bagging:
+  {max_features: 0.908115713423863, max_rules: 9, max_samples: 0.9277949143533335, model_type: conjunction,
+   n_estimators: 109, p_options: 0.7823433255515356}
+
+samba:
+  n_estimators: 22
+
+adaboost:
+  {base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0, base_estimator__class_weight: null,
+   base_estimator__criterion: gini, base_estimator__max_depth: 5, base_estimator__max_features: null,
+   base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
+   base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1, base_estimator__min_samples_split: 2,
+   base_estimator__min_weight_fraction_leaf: 0.0, base_estimator__random_state: null,
+   base_estimator__splitter: best, n_estimators: 354}
+
+svm_linear:
+  C: 0.3867
+
 cb_boost:
   n_stumps: 1
-  n_max_iterations: 10
+  n_max_iterations: 20
   estimators_generator: "Stumps"
 
 cq_boost:
@@ -50,15 +69,42 @@ cq_boost:
 min_cq:
   n_stumps_per_attribute: 1
 
-adaboost:
-  n_estimators: 10
+decision_tree:
+  {criterion: entropy, max_depth: 271, splitter: random}
 
-adaboost_pregen:
-  n_estimators: 10
-  n_stumps: 1
+early_fusion_adaboost:
+  {base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0, base_estimator__class_weight: null,
+   base_estimator__criterion: gini, base_estimator__max_depth: 5, base_estimator__max_features: null,
+   base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
+   base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1, base_estimator__min_samples_split: 2,
+   base_estimator__min_weight_fraction_leaf: 0.0, base_estimator__random_state: null,
+   base_estimator__splitter: best, base_estimator_config: null, n_estimators: 273}
 
-decision_tree:
-  max_depth: 2
+early_fusion_decision_tree:
+  {criterion: entropy, max_depth: 293, splitter: random}
+
+early_fusion_random_forest:
+  {criterion: gini, max_depth: 8, n_estimators: 46}
+
+random_forest:
+  {criterion: gini, max_depth: 8, n_estimators: 32}
+
+weighted_linear_late_fusion:
+  classifier_configs:
+    - decision_tree: {criterion: entropy, max_depth: 112, splitter: random}
+    - adaboost: {base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0,
+                 base_estimator__class_weight: null, base_estimator__criterion: gini, base_estimator__max_depth: 2,
+                 base_estimator__max_features: null, base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
+                 base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1,
+                 base_estimator__min_samples_split: 2, base_estimator__min_weight_fraction_leaf: 0.0,
+                 base_estimator__random_state: null, base_estimator__splitter: best, n_estimators: 400}
+  classifiers_names: [decision_tree, adaboost]
+  nb_cores: 1
+  rs: 724
+  weights: [0.9636627605010293, 0.3834415188257777]
+
+scm:
+  {max_rules: 10, model_type: conjunction, p: 0.8310271995093625}
 
 mumbo:
   base_estimator:
diff --git a/config_files/config_private_algos.yml b/config_files/config_private_algos.yml
index bc745606..a676749a 100644
--- a/config_files/config_private_algos.yml
+++ b/config_files/config_private_algos.yml
@@ -1,14 +1,14 @@
 # The base configuration of the benchmark
 log: True
-name: ["mnist_0_9_train"]
+name: ["multiview_mnist"]
 label: "_"
 file_type: ".hdf5"
-views: ["NIMST_data", ]
-pathf: "/home/baptiste/Documents/Datasets/MNist/"
+views:
+pathf: "examples/data/"
 nice: 0
 random_state: 43
 nb_cores: 1
-full: False
+full: True
 debug: True
 add_noise: False
 noise_std: 0.0
@@ -19,16 +19,20 @@ track_tracebacks: False
 multiclass_method: "oneVersusOne"
 split: 0.96
 nb_folds: 5
-nb_class: 2
+nb_class:
 classes:
-type: ["monoview",]
-algos_monoview: ["hm_gb_cbound","cb_boost"]
-algos_multiview: ["mumbo","mvml"]
+type: ["monoview","multiview"]
+algos_monoview: ["decision_tree","adaboost"]
+algos_multiview: ["mumbo","mvml", 'lp_norm_mkl', 'mucombo', 'early_fusion_decision_tree', 'early_fusion_adaboost']
 stats_iter: 1
 metrics:
-  zero_one_loss: {}
+  accuracy_score: {}
   f1_score: {}
-metric_princ: "zero_one_loss"
+metric_princ: "accuracy_score"
 hps_type: "None"
 hps_args:
-  n_iter: 2
\ No newline at end of file
+  n_iter: 2
+mumbo:
+  base_estimator:
+    decision_tree:
+      max_depth: 3
\ No newline at end of file
diff --git a/setup.py b/setup.py
index c92d5a75..8f774ddf 100644
--- a/setup.py
+++ b/setup.py
@@ -94,6 +94,7 @@ def setup_package():
     # ce qui est notre cas
     license="GNUGPL",
 
+
     # Il y a encore une chiée de paramètres possibles, mais avec ça vous
     # couvrez 90% des besoins
     # ext_modules=cythonize(
diff --git a/summit/multiview_platform/exec_classif.py b/summit/multiview_platform/exec_classif.py
index 6c75194a..a84014d3 100644
--- a/summit/multiview_platform/exec_classif.py
+++ b/summit/multiview_platform/exec_classif.py
@@ -548,7 +548,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
                                  argument_dictionaries=None,
                                  benchmark=None, views=None, views_indices=None,
                                  flag=None, labels=None,
-                                 track_tracebacks=False):  # pragma: no cover
+                                 track_tracebacks=False, nb_cores=1):  # pragma: no cover
     results_monoview, labels_names = benchmark_init(directory,
                                                     classification_indices,
                                                     labels,
@@ -564,7 +564,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
             results_monoview += [
                 exec_monoview(directory, X, Y, args["name"], labels_names,
                               classification_indices, k_folds,
-                              1, args["file_type"], args["pathf"], random_state,
+                              nb_cores, args["file_type"], args["pathf"], random_state,
                               hyper_param_search=hyper_param_search,
                               metrics=metrics,
                               **arguments)]
@@ -679,7 +679,7 @@ def exec_benchmark(nb_cores, stats_iter,
     for arguments in benchmark_arguments_dictionaries:
         benchmark_results = exec_one_benchmark_mono_core(
             dataset_var=dataset_var,
-            track_tracebacks=track_tracebacks,
+            track_tracebacks=track_tracebacks, nb_cores=nb_cores,
             **arguments)
         analyze_iterations([benchmark_results],
                            benchmark_arguments_dictionaries, stats_iter,
diff --git a/summit/multiview_platform/metrics/balanced_accuracy.py b/summit/multiview_platform/metrics/balanced_accuracy.py
new file mode 100644
index 00000000..6d4ab5d0
--- /dev/null
+++ b/summit/multiview_platform/metrics/balanced_accuracy.py
@@ -0,0 +1,41 @@
+"""Functions :
+ score: to get the accuracy score
+ get_scorer: returns a sklearn scorer for grid search
+"""
+
+from sklearn.metrics import balanced_accuracy_score as metric
+from sklearn.metrics import make_scorer
+
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+
+
+def score(y_true, y_pred, multiclass=False, **kwargs):
+    """Arguments:
+    y_true: real labels
+    y_pred: predicted labels
+
+    Keyword Arguments:
+    "0": weights to compute accuracy
+
+    Returns:
+    Weighted accuracy score for y_true, y_pred"""
+    score = metric(y_true, y_pred, **kwargs)
+    return score
+
+
+def get_scorer(**kwargs):
+    """Keyword Arguments:
+    "0": weights to compute accuracy
+
+    Returns:
+    A weighted sklearn scorer for accuracy"""
+    return make_scorer(metric, greater_is_better=True,
+                       **kwargs)
+
+
+def get_config(**kwargs):
+    config_string = "Balanced accuracy score using {}, (higher is better)".format(
+        kwargs)
+    return config_string
diff --git a/summit/multiview_platform/monoview_classifiers/adaboost.py b/summit/multiview_platform/monoview_classifiers/adaboost.py
index cd8ce3db..579b9ffd 100644
--- a/summit/multiview_platform/monoview_classifiers/adaboost.py
+++ b/summit/multiview_platform/monoview_classifiers/adaboost.py
@@ -35,7 +35,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
                                     )
         self.param_names = ["n_estimators", "base_estimator"]
         self.classed_params = ["base_estimator"]
-        self.distribs = [CustomRandint(low=1, high=500),
+        self.distribs = [CustomRandint(low=1, high=100),
                          base_boosting_estimators]
         self.weird_strings = {"base_estimator": "class_name"}
         self.plotted_metric = metrics.zero_one_loss
diff --git a/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py b/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py
index f0fbd955..43589981 100644
--- a/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py
+++ b/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py
@@ -17,6 +17,7 @@ __status__ = "Prototype"  # Production, Development, Prototype
 
 classifier_class_name = "AdaboostPregen"
 
+
 class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
                      PregenClassifier):
     """
diff --git a/summit/multiview_platform/monoview_classifiers/samba.py b/summit/multiview_platform/monoview_classifiers/samba.py
new file mode 100644
index 00000000..f43defd7
--- /dev/null
+++ b/summit/multiview_platform/monoview_classifiers/samba.py
@@ -0,0 +1,70 @@
+from SamBA.samba import NeighborHoodClassifier, ExpTrainWeighting
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier
+from SamBA.relevances import *
+from SamBA.distances import *
+from sklearn.preprocessing import RobustScaler
+
+from ..monoview.monoview_utils import BaseMonoviewClassifier
+from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
+
+
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+
+
+# class Decis
+classifier_class_name = "SamBAClf"
+
+class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
+
+    def __init__(self, base_estimator=DecisionTreeClassifier(max_depth=1,
+                                                       splitter='best',
+                                                       criterion='gini'),
+                 n_estimators=2,
+                 estimator_params=tuple(),
+                 relevance=MarginRelevance(),
+                 distance=EuclidianDist(),
+                 train_weighting=ExpTrainWeighting(),
+                 keep_selected_features=True,
+                 normalizer=RobustScaler(),
+                 b=2,
+                 pred_train=False,
+                 forced_diversity=True,
+                 **kwargs):
+        """
+
+        Parameters
+        ----------
+        random_state
+        model_type
+        max_rules
+        p
+        kwargs
+        """
+        super(SamBAClf, self).__init__(base_estimator=base_estimator,
+                 n_estimators=n_estimators,
+                 estimator_params=estimator_params,
+                 relevance=relevance,
+                 distance=distance,
+                 train_weighting=train_weighting,
+                 keep_selected_features=keep_selected_features,
+                 normalizer=normalizer,
+                 forced_diversity=forced_diversity,
+                 b=b,pred_train=pred_train)
+        self.param_names = ["n_estimators", "relevance", "distance",
+                            "train_weighting", "b", "pred_train", "normalizer"]
+        self.distribs = [CustomRandint(low=1, high=30),
+                         [ExpRelevance(), MarginRelevance()],
+                         [EuclidianDist(), PolarDist(), ExpEuclidianDist()],
+                         [ExpTrainWeighting()],
+                         CustomUniform(0.5, 3),
+                         [True, False],
+                         [RobustScaler(), None]]
+        self.classed_params = []
+        self.weird_strings = {}
+
+    def get_interpretation(self, directory, base_file_name, y_test, multi_class=False):
+        interpret_string = self.get_feature_importance(directory, base_file_name)
+        return interpret_string
diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_samba.py b/summit/multiview_platform/multiview_classifiers/early_fusion_samba.py
new file mode 100644
index 00000000..7722f003
--- /dev/null
+++ b/summit/multiview_platform/multiview_classifiers/early_fusion_samba.py
@@ -0,0 +1,40 @@
+from .additions.early_fusion_from_monoview import BaseEarlyFusion
+from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
+from ..monoview_classifiers.samba import SamBAClf
+from SamBA.samba import *
+from SamBA.distances import *
+from SamBA.relevances import *
+from sklearn.tree import DecisionTreeClassifier
+
+classifier_class_name = "EarlyFusionSamba"
+
+
+class EarlyFusionSamba(BaseEarlyFusion):
+
+    def __init__(self, random_state=None, base_estimator=DecisionTreeClassifier(max_depth=1,
+                                                       splitter='best',
+                                                       criterion='gini'),
+                 n_estimators=2,
+                 estimator_params=tuple(),
+                 relevance=MarginRelevance(),
+                 distance=EuclidianDist(),
+                 train_weighting=ExpTrainWeighting(pred_train=True),
+                 keep_selected_features=True,
+                 normalizer=RobustScaler(),
+                 pred_train=False,
+                 b=2,
+                 **kwargs):
+        BaseEarlyFusion.__init__(self, random_state=random_state,
+                                 monoview_classifier="samba",
+                                 base_estimator=base_estimator, estimator_params=estimator_params,
+                                 relevance=relevance, distance=distance, train_weighting=train_weighting,
+                                 keep_selected_features=keep_selected_features, normalizer=normalizer,
+                                 n_estimators=n_estimators, pred_train=pred_train, b=b, **kwargs)
+        self.param_names = ["n_estimators", "relevance", "distance",
+                            "train_weighting", "b", "pred_train"]
+        self.distribs = [CustomRandint(low=1, high=30),
+                         [ExpRelevance(), MarginRelevance()],
+                         [EuclidianDist(), PolarDist(), ExpEuclidianDist()],
+                         [ExpTrainWeighting(pred_train=True)],
+                         CustomUniform(0.25, 3),
+                         [True, False]]
\ No newline at end of file
diff --git a/summit/multiview_platform/multiview_classifiers/mumbo.py b/summit/multiview_platform/multiview_classifiers/mumbo.py
index e631cbc2..203228ba 100644
--- a/summit/multiview_platform/multiview_classifiers/mumbo.py
+++ b/summit/multiview_platform/multiview_classifiers/mumbo.py
@@ -47,6 +47,13 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
         """
         if base_estimator is None:
             self.base_estimator = DecisionTreeClassifier()
+        elif type(base_estimator) is list:
+            if type(base_estimator[0]) is dict:
+                self.base_estimator = [self.set_base_estim_from_dict(estim) for estim in base_estimator]
+            elif isinstance(base_estimator[0], BaseEstimator):
+                self.base_estimator = base_estimator
+            else:
+                raise ValueError("base_estimator should ba a list of dict or a sklearn classifier list")
         elif isinstance(base_estimator, dict):
             self.base_estimator = self.set_base_estim_from_dict(base_estimator)
             MumboClassifier.set_params(self, **params)
-- 
GitLab