Skip to content
Snippets Groups Projects
Commit d3930ac2 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added Mumbo

parent 14ea72d8
No related branches found
No related tags found
No related merge requests found
Pipeline #3532 failed
# The base configuration of the benchmark
Base :
log: true
name: ["Plausible"]
label: "_"
type: ".hdf5"
views:
pathf: "../data/"
nice: 0
random_state: 42
nb_cores: 1
full: False
debug: True
add_noise: False
noise_std: 0.0
res_dir: "../results/"
# All the classification-realted configuration options
Classification:
multiclass_method: "oneVersusOne"
split: 0.8
nb_folds: 2
nb_class: 2
classes:
type: ["multiview"]
algos_monoview: ["all"]
algos_multiview: ["mumbo"]
stats_iter: 2
metrics: ["accuracy_score", "f1_score"]
metric_princ: "f1_score"
hps_type: "randomized_search"
hps_iter: 2
#####################################
# The Monoview Classifier arguments #
#####################################
random_forest:
n_estimators: [25]
max_depth: [3]
criterion: ["entropy"]
svm_linear:
C: [1]
svm_rbf:
C: [1]
svm_poly:
C: [1]
degree: [2]
adaboost:
n_estimators: [50]
base_estimator: ["DecisionTreeClassifier"]
adaboost_pregen:
n_estimators: [50]
base_estimator: ["DecisionTreeClassifier"]
n_stumps: [1]
adaboost_graalpy:
n_iterations: [50]
n_stumps: [1]
decision_tree:
max_depth: [10]
criterion: ["gini"]
splitter: ["best"]
decision_tree_pregen:
max_depth: [10]
criterion: ["gini"]
splitter: ["best"]
n_stumps: [1]
sgd:
loss: ["hinge"]
penalty: [l2]
alpha: [0.0001]
knn:
n_neighbors: [5]
weights: ["uniform"]
algorithm: ["auto"]
scm:
model_type: ["conjunction"]
max_rules: [10]
p: [0.1]
scm_pregen:
model_type: ["conjunction"]
max_rules: [10]
p: [0.1]
n_stumps: [1]
cq_boost:
mu: [0.01]
epsilon: [1e-06]
n_max_iterations: [5]
n_stumps: [1]
cg_desc:
n_max_iterations: [10]
n_stumps: [1]
cb_boost:
n_max_iterations: [10]
n_stumps: [1]
lasso:
alpha: [1]
max_iter: [2]
gradient_boosting:
n_estimators: [2]
######################################
# The Multiview Classifier arguments #
######################################
weighted_linear_early_fusion:
view_weights: [None]
monoview_classifier_name: ["decision_tree"]
monoview_classifier_config:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
entropy_fusion:
classifier_names: [["decision_tree"]]
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
disagree_fusion:
classifier_names: [["decision_tree"]]
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
double_fault_fusion:
classifier_names: [["decision_tree"]]
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
difficulty_fusion:
classifier_names: [["decision_tree"]]
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
scm_late_fusion:
classifier_names: [["decision_tree"]]
p: 0.1
max_rules: 10
model_type: 'conjunction'
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
majority_voting_fusion:
classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]]
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
bayesian_inference_fusion:
classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]]
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
weighted_linear_late_fusion:
classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]]
classifier_configs:
decision_tree:
max_depth: [1]
criterion: ["gini"]
splitter: ["best"]
mumbo:
base_estimator: [None]
n_estimators: [10]
best_view_mode: ["edge"]
\ No newline at end of file
...@@ -72,10 +72,13 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): ...@@ -72,10 +72,13 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args):
benchmark["monoview"] = monoview_algos benchmark["monoview"] = monoview_algos
if "multiview" in cl_type: if "multiview" in cl_type:
if multiview_algos==["all"]:
benchmark["multiview"] = [name for _, name, isPackage in benchmark["multiview"] = [name for _, name, isPackage in
pkgutil.iter_modules([ pkgutil.iter_modules([
"./mono_multi_view_classifiers/multiview_classifiers"]) "./mono_multi_view_classifiers/multiview_classifiers"])
if not isPackage] if not isPackage]
else:
benchmark["multiview"] = multiview_algos
return benchmark return benchmark
......
...@@ -272,7 +272,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds ...@@ -272,7 +272,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, k_folds
searching_tool=hyper_param_search, n_iter=n_iter, searching_tool=hyper_param_search, n_iter=n_iter,
classifier_config=classifier_config) classifier_config=classifier_config)
classifier = getattr(classifier_module, classifier_name)(random_state, classifier = getattr(classifier_module, classifier_name)(random_state=random_state,
**classifier_config) **classifier_config)
logging.debug("Done:\t Optimizing hyperparameters") logging.debug("Done:\t Optimizing hyperparameters")
......
from sklearn.tree import DecisionTreeClassifier
from multimodalboost.mumbo import MumboClassifier
from ..multiview.multiview_utils import BaseMultiviewClassifier, \
get_examples_views_indices
from ..utils.hyper_parameter_search import CustomRandint
classifier_class_name = "Mumbo"
class Mumbo(BaseMultiviewClassifier, MumboClassifier):
def __init__(self, base_estimator=None,
n_estimators=50,
random_state=None,
best_view_mode="edge"):
super().__init__(random_state)
super(BaseMultiviewClassifier, self).__init__(base_estimator=base_estimator,
n_estimators=n_estimators,
random_state=random_state,
best_view_mode=best_view_mode)
self.param_names = ["base_estimator", "n_estimators", "random_state", "best_view_mode"]
self.distribs = [[DecisionTreeClassifier(max_depth=1)],
CustomRandint(5,200), [random_state], ["edge", "error"]]
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, view_indices = get_examples_views_indices(X,
train_indices,
view_indices)
numpy_X, view_limits = X.to_numpy_array(example_indices=train_indices,
view_indices=view_indices)
return super(Mumbo, self).fit(numpy_X, y[train_indices],
view_limits)
def predict(self, X, example_indices=None, view_indices=None):
example_indices, view_indices = get_examples_views_indices(X,
example_indices,
view_indices)
numpy_X, view_limits = X.to_numpy_array(example_indices=example_indices,
view_indices=view_indices)
return super(Mumbo, self).predict(numpy_X)
...@@ -295,6 +295,39 @@ class Dataset(): ...@@ -295,6 +295,39 @@ class Dataset():
# The following methods are hdf5 free # The following methods are hdf5 free
def to_numpy_array(self, example_indices=None, view_indices=None):
"""
To concanteant the needed views in one big numpy array while saving the
limits of each view in a list, to be bale to retrieve them later.
Parameters
----------
example_indices : array like,
The indices of the examples to extract from the dataset
view_indices : array like,
The indices of the view to concatenate in the numpy array
Returns
-------
concat_views : numpy array,
The numpy array containing all the needed views.
view_limits : list of int
The limits of each slice used to extract the views.
"""
view_limits = [0]
for view_index in view_indices:
view_data = self.get_v(view_index, example_indices=example_indices)
nb_features = view_data.shape[1]
view_limits.append(view_limits[-1]+nb_features)
concat_views = np.concatenate([self.get_v(view_index,
example_indices=example_indices)
for view_index in view_indices], axis=1)
return concat_views, view_limits
def select_views_and_labels(self, nb_labels=None, def select_views_and_labels(self, nb_labels=None,
selected_label_names=None, random_state=None, selected_label_names=None, random_state=None,
view_names = None, path_for_new="../data/"): view_names = None, path_for_new="../data/"):
......
...@@ -93,7 +93,7 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None) ...@@ -93,7 +93,7 @@ def get_test_folds_preds(X, y, cv, estimator, framework, available_indices=None)
def randomized_search(X, y, framework, random_state, output_file_name, classifier_module, def randomized_search(X, y, framework, random_state, output_file_name, classifier_module,
classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None], classifier_name, folds=4, nb_cores=1, metric=["accuracy_score", None],
n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None): n_iter=30, classifier_kwargs =None, learning_indices=None, view_indices=None):
estimator = getattr(classifier_module, classifier_name)(random_state, estimator = getattr(classifier_module, classifier_name)(random_state=random_state,
**classifier_kwargs) **classifier_kwargs)
params_dict = estimator.genDistribs() params_dict = estimator.genDistribs()
if params_dict: if params_dict:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment