Skip to content
Snippets Groups Projects
Commit d650f627 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Cuisine algos modifs

parent e64548bc
Branches cuisine
No related tags found
No related merge requests found
Pipeline #6403 failed
Showing
with 1334 additions and 142 deletions
# The base configuration of the benchmark
log: True
name: ["test_boules"]
label: "_1_3"
name: ["ionosphere", "abalone", "australian", "balance", "bupa", "cylinder", "hepatitis", "pima", "yeast", "zoo"]
label: "comp_1"
file_type: ".hdf5"
views:
pathf: "/home/baptiste/Documents/Clouded/short_projects/latent_space_study/"
pathf: "/home/baptiste/Documents/Datasets/UCI/both/"
nice: 0
random_state: 42
nb_cores: 1
full: False
full: True
debug: True
add_noise: False
noise_std: 0.0
......@@ -17,74 +17,125 @@ track_tracebacks: False
# All the classification-realted configuration options
multiclass_method: "oneVersusOne"
split: 0.10
split: 0.50
nb_folds: 5
nb_class: 4
nb_class: 2
classes:
type: ["multiview","monoview"]
algos_monoview: ["cb_boost", "decision_tree", 'random_forest']
algos_multiview: ["mv_cb_boost", "weighted_linear_late_fusion","weighted_linear_early_fusion","mumbo" ]
type: ["monoview",]
algos_monoview: ["cb_boost", "self_opt_cb", "adaboost", "cq_boost", "min_cq", "adaboost_pregen", "self_opt_cb_pseudo", "self_opt_cb_root"]
algos_multiview: ["mv_cb_boost","early_fusion_dt", "early_fusion_cb", "early_fusion_rf","mumbo", "early_fusion_svm" ]
stats_iter: 5
metrics:
accuracy_score: {}
f1_score:
average: 'micro'
metric_princ: "accuracy_score"
hps_type: "Random"
hps_type: "None"
hps_args:
n_iter: 10
n_iter: 30
equivalent_draws: True
svm_rbf:
C: 0.7
cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
n_stumps: 1
n_max_iterations: 10
estimators_generator: "Stumps"
cq_boost:
n_max_iterations: 10
n_stumps: 1
min_cq:
n_stumps_per_attribute: 1
adaboost:
n_estimators: 10
adaboost_pregen:
n_estimators: 10
n_stumps: 1
decision_tree:
max_depth: 2
mumbo:
base_estimator: decision_tree
base_estimator__max_depth: 1
n_estimators: 80
base_estimator:
- svm_rbf:
C: 0.001
- svm_rbf:
C: 0.001
- decision_tree:
max_depth: 1
- decision_tree:
max_depth: 1
n_estimators: 100
mv_cb_boost:
n_max_iterations: 80
n_stumps: 30
estimators_generator: "Trees"
max_depth: 1
n_estimators: 100
base_estimator: ["Stumps", "Stumps", "Stumps", "Stumps"]
base_estimator__n_stumps: [50, 50, 50, 50]
base_estimator__check_diff: False
base_estimator__C: 0.001
base_estimator__kernel: "rbf"
base_estimator__max_depth: 2
base_estimator__distribution_type: "uniform"
base_estimator__low: 0
base_estimator__high: 10
base_estimator__attributes_ratio: 0.5
base_estimator__examples_ratio: 0.55
pb_mv_boost:
num_iterations: 20
decision_tree_depth: 1
weighted_linear_early_fusion:
monoview_classifier_name: "cb_boost"
early_fusion_cb:
monoview_classifier_config:
cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
n_estimators: 100
base_estimator__max_depth: 1
early_fusion_dt:
monoview_classifier_config:
decision_tree:
max_depth: 2
early_fusion_rf:
monoview_classifier_config:
random_forest:
n_estimators: 100
max_depth: 1
weighted_linear_late_fusion:
classifiers_names: ["cb_boost", "cb_boost", "cb_boost", "cb_boost"]
classifier_configs:
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
- cb_boost:
n_stumps: 30
n_max_iterations: 20
estimators_generator: "Trees"
max_depth: 1
early_fusion_svm:
monoview_classifier_config:
svm_rbf:
C: 0.7
#pb_mv_boost:
# num_iterations: 20
# decision_tree_depth: 1
#weighted_linear_early_fusion:
# monoview_classifier_name: "cb_boost"
# monoview_classifier_config:
# cb_boost:
# n_stumps: 30
# n_max_iterations: 20
# estimators_generator: "Trees"
# max_depth: 1
#weighted_linear_late_fusion:
# classifiers_names: ["cb_boost", "cb_boost", "cb_boost", "cb_boost"]
# classifier_configs:
# - cb_boost:
# n_stumps: 30
# n_max_iterations: 20
# estimators_generator: "Trees"
# max_depth: 1
# - cb_boost:
# n_stumps: 30
# n_max_iterations: 20
# estimators_generator: "Trees"
# max_depth: 1
# - cb_boost:
# n_stumps: 30
# n_max_iterations: 20
# estimators_generator: "Trees"
# max_depth: 1
# - cb_boost:
# n_stumps: 30
# n_max_iterations: 20
# estimators_generator: "Trees"
# max_depth: 1
#
......@@ -129,6 +129,7 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1])))
for i in
range(self.estimator_errors_.shape[0])])
self.feature_importances_ = np.ones(X.shape[1])
return self
# def canProbas(self):
......@@ -172,32 +173,35 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
# self.n_stumps_per_attribute = params["n_tumps"]
# return self
def getInterpret(self, directory, y_test):
interpretString = ""
# interpretString += self.getFeatureImportance(directory)
# interpretString += "\n\n Estimator error | Estimator weight\n"
# interpretString += "\n".join(
# [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
# error, weight in
# zip(self.estimator_errors_, self.estimator_weights_)])
# step_test_metrics = np.array(
# [self.plotted_metric.score(y_test, step_pred) for step_pred in
# self.step_predictions])
# get_accuracy_graph(step_test_metrics, "AdaboostPregen",
# directory + "test_metrics.png",
# self.plotted_metric_name, set="test")
# # get_accuracy_graph(self.metrics, "AdaboostPregen",
# # directory + "metrics.png", self.plotted_metric_name,
# # bounds=list(self.bounds),
# # bound_name="boosting bound")
# np.savetxt(directory + "test_metrics.csv", step_test_metrics,
# delimiter=',')
# np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',')
# np.savetxt(directory + "times.csv",
# np.array([self.train_time, self.pred_time]), delimiter=',')
# np.savetxt(directory + "times_iter.csv",
# np.array([self.train_time, len(self.estimator_weights_)]), delimiter=',')
return interpretString
# def getInterpret(self, directory, y_test):
# # interpretString = ""
# # interpretString += self.getFeatureImportance(directory)
# # interpretString += "\n\n Estimator error | Estimator weight\n"
# # interpretString += "\n".join(
# # [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
# # error, weight in
# # zip(self.estimator_errors_, self.estimator_weights_)])
# # step_test_metrics = np.array(
# # [self.plotted_metric.score(y_test, step_pred) for step_pred in
# # self.step_predictions])
# # get_accuracy_graph(step_test_metrics, "AdaboostPregen",
# # directory + "test_metrics.png",
# # self.plotted_metric_name, set="test")
# # # get_accuracy_graph(self.metrics, "AdaboostPregen",
# # # directory + "metrics.png", self.plotted_metric_name,
# # # bounds=list(self.bounds),
# # # bound_name="boosting bound")
# # np.savetxt(directory + "test_metrics.csv", step_test_metrics,
# # delimiter=',')
# # np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',')
# # np.savetxt(directory + "times.csv",
# # np.array([self.train_time, self.pred_time]), delimiter=',')
# # np.savetxt(directory + "times_iter.csv",
# # np.array([self.train_time, len(self.estimator_weights_)]), delimiter=',')
# return interpretString
def feature_importances_(self, value):
self._feature_importances_ = value
# def formatCmdArgs(args):
# """Used to format kwargs for the parsed args"""
......@@ -207,10 +211,10 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
# return kwargsDict
def paramsToSet(nIter, random_state):
"""Used for weighted linear early fusion to generate random search sets"""
paramsSet = []
for _ in range(nIter):
paramsSet.append({"n_estimators": random_state.randint(1, 500),
"base_estimator": None})
return paramsSet
# def paramsToSet(nIter, random_state):
# """Used for weighted linear early fusion to generate random search sets"""
# paramsSet = []
# for _ in range(nIter):
# paramsSet.append({"n_estimators": random_state.randint(1, 500),
# "base_estimator": None})
# return paramsSet
......@@ -15,6 +15,7 @@ from ...monoview.monoview_utils import change_label_to_minus
from ... import metrics
# Used for CBBoost
class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
......
This diff is collapsed.
......@@ -45,7 +45,7 @@ class CBBoost(CBBoostClassifier, BaseMonoviewClassifier):
mincq_tracking=False
)
self.param_names = ["n_max_iterations", "n_stumps", "random_state"]
self.distribs = [CustomRandint(low=2, high=500), [n_stumps],
self.distribs = [CustomRandint(low=2, high=500), [1,2,10],
[random_state]]
self.classed_params = []
self.weird_strings = {}
......
......@@ -39,23 +39,23 @@ class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier):
# """Used to know if the classifier can return label probabilities"""
# return False
def get_interpretation(self, directory, y_test, multi_class=False):
np.savetxt(directory + "train_metrics.csv", self.train_metrics,
delimiter=',')
np.savetxt(directory + "c_bounds.csv", self.c_bounds,
delimiter=',')
np.savetxt(directory + "y_test_step.csv", self.step_decisions,
delimiter=',')
step_metrics = []
for step_index in range(self.step_decisions.shape[1] - 1):
step_metrics.append(self.plotted_metric.score(y_test,
self.step_decisions[:,
step_index]))
step_metrics = np.array(step_metrics)
np.savetxt(directory + "step_test_metrics.csv", step_metrics,
delimiter=',')
return getInterpretBase(self, directory, "CQBoost", self.weights_,
y_test)
# def get_interpretation(self, directory, y_test, multi_class=False):
# # np.savetxt(directory + "train_metrics.csv", self.train_metrics,
# # delimiter=',')
# # np.savetxt(directory + "c_bounds.csv", self.c_bounds,
# # delimiter=',')
# # np.savetxt(directory + "y_test_step.csv", self.step_decisions,
# # delimiter=',')
# # step_metrics = []
# # for step_index in range(self.step_decisions.shape[1] - 1):
# # step_metrics.append(self.plotted_metric.score(y_test,
# # self.step_decisions[:,
# # step_index]))
# # step_metrics = np.array(step_metrics)
# # np.savetxt(directory + "step_test_metrics.csv", step_metrics,
# # delimiter=',')
# return getInterpretBase(self, directory, "CQBoost", self.weights_,
# y_test)
# def formatCmdArgs(args):
......
......@@ -616,27 +616,27 @@ class MinCQ(MinCqLearner, BaseMonoviewClassifier):
# """Used to know if the classifier can return label probabilities"""
# return True
def set_params(self, **params):
self.mu = params["mu"]
self.random_state = params["random_state"]
self.n_stumps_per_attribute = params["n_stumps_per_attribute"]
return self
def get_params(self, deep=True):
return {"random_state": self.random_state, "mu": self.mu,
"n_stumps_per_attribute": self.n_stumps_per_attribute}
def getInterpret(self, directory, y_test):
interpret_string = "Train C_bound value : " + str(self.cbound_train)
y_rework = np.copy(y_test)
y_rework[np.where(y_rework == 0)] = -1
interpret_string += "\n Test c_bound value : " + str(
self.majority_vote.cbound_value(self.x_test, y_rework))
np.savetxt(directory+"times.csv", np.array([self.train_time, 0]))
return interpret_string
def get_name_for_fusion(self):
return "MCQ"
# def set_params(self, **params):
# self.mu = params["mu"]
# self.random_state = params["random_state"]
# self.n_stumps_per_attribute = params["n_stumps_per_attribute"]
# return self
# def get_params(self, deep=True):
# return {"random_state": self.random_state, "mu": self.mu,
# "n_stumps_per_attribute": self.n_stumps_per_attribute}
# #
# def getInterpret(self, directory, y_test):
# interpret_string = "Train C_bound value : " + str(self.cbound_train)
# y_rework = np.copy(y_test)
# y_rework[np.where(y_rework == 0)] = -1
# interpret_string += "\n Test c_bound value : " + str(
# self.majority_vote.cbound_value(self.x_test, y_rework))
# np.savetxt(directory+"times.csv", np.array([self.train_time, 0]))
# return interpret_string
# def get_name_for_fusion(self):
# return "MCQ"
#
# def formatCmdArgs(args):
......@@ -646,9 +646,9 @@ class MinCQ(MinCqLearner, BaseMonoviewClassifier):
# return kwargsDict
def paramsToSet(nIter, randomState):
"""Used for weighted linear early fusion to generate random search sets"""
paramsSet = []
for _ in range(nIter):
paramsSet.append({})
return paramsSet
# def paramsToSet(nIter, randomState):
# """Used for weighted linear early fusion to generate random search sets"""
# paramsSet = []
# for _ in range(nIter):
# paramsSet.append({})
# return paramsSet
from .additions.SelOptCB import SelfOptCBBoostClassifier
classifier_class_name = "SelfOptCBBoostBaseStump"
class SelfOptCBBoostBaseStump(SelfOptCBBoostClassifier):
def __init__(self, n_max_iterations=10, random_state=42, twice_the_same=True,
random_start=False, save_train_data=True,
test_graph=True, base_estimator="BaseStump"):
SelfOptCBBoostClassifier.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state, twice_the_same=twice_the_same,
random_start=random_start, save_train_data=save_train_data,
test_graph=test_graph, base_estimator=base_estimator)
from .additions.SelOptCB import SelfOptCBBoostClassifier
classifier_class_name = "SelfOptCBBoostBaseStump"
class SelfOptCBBoostBaseStump(SelfOptCBBoostClassifier):
def __init__(self, n_max_iterations=10, random_state=42, twice_the_same=True,
random_start=False, save_train_data=True,
test_graph=True, base_estimator="PseudoLinearStump"):
SelfOptCBBoostClassifier.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state, twice_the_same=twice_the_same,
random_start=random_start, save_train_data=save_train_data,
test_graph=test_graph, base_estimator=base_estimator)
from .additions.SelOptCB import SelfOptCBBoostClassifier
classifier_class_name = "SelfOptCBBoostBaseStump"
class SelfOptCBBoostBaseStump(SelfOptCBBoostClassifier):
def __init__(self, n_max_iterations=10, random_state=42, twice_the_same=True,
random_start=False, save_train_data=True,
test_graph=True, base_estimator="LinearStump"):
SelfOptCBBoostClassifier.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state, twice_the_same=twice_the_same,
random_start=random_start, save_train_data=save_train_data,
test_graph=test_graph, base_estimator=base_estimator)
......@@ -35,6 +35,7 @@ class BaseMultiviewClassifier(BaseClassifier):
self.used_views = None
def set_base_estim_from_dict(self, base_estim_dict, **kwargs):
print(base_estim_dict)
if base_estim_dict is None:
base_estimator = DecisionTreeClassifier()
elif isinstance(base_estim_dict, str) and kwargs is not None:
......
......@@ -32,7 +32,8 @@ class MultiviewCBoundBoostingAdapt(BaseMultiviewClassifier, MultiviewCBoundBoost
weight_update=weight_update, use_previous_voters=use_previous_voters,
full_combination=full_combination,
min_cq_pred=min_cq_pred, min_cq_mu=min_cq_mu,
sig_mult=sig_mult, sig_offset=sig_offset,
sig_mult=sig_mult, sig_offset=sig_offset, only_zero_one_weights=False,
update_only_chosen=False,
**kwargs)
BaseMultiviewClassifier.__init__(self, random_state)
self.param_names = ["n_estimators","random_state"]
......
import numpy as np
from .additions.fusion_utils import BaseFusionClassifier
from ..multiview.multiview_utils import get_available_monoview_classifiers, \
BaseMultiviewClassifier, ConfigGenerator
from ..utils.dataset import get_samples_views_indices
from ..utils.multiclass import get_mc_estim, MultiClassWrapper
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionAdaboost"
class EarlyFusionAdaboost(BaseMultiviewClassifier, BaseFusionClassifier):
"""
Builds a monoview dataset by concatenating the views (with a weight if
needed) and learns a monoview classifier on the concatenation
"""
def __init__(self, random_state=None, view_weights=None,
monoview_classifier_name="adaboost",
monoview_classifier_config={}):
BaseMultiviewClassifier.__init__(self, random_state=random_state)
self.view_weights = view_weights
self.monoview_classifier_name = monoview_classifier_name
self.short_name = "early_fusion"
if monoview_classifier_name in monoview_classifier_config:
self.monoview_classifier_config = monoview_classifier_config[
monoview_classifier_name]
self.monoview_classifier_config = monoview_classifier_config
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name, monoview_classifier_config)
self.param_names = ["monoview_classifier_config"]
self.distribs = [get_available_monoview_classifiers(),
ConfigGenerator(get_available_monoview_classifiers())]
self.classed_params = []
self.weird_strings = {}
def set_params(self, monoview_classifier_name="adaboost",
monoview_classifier_config={}, **params):
self.monoview_classifier_name = monoview_classifier_name
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name,
monoview_classifier_config)
self.monoview_classifier_config = self.monoview_classifier.get_params()
self.short_name = "early_fusion_adaboost"
return self
def get_params(self, deep=True):
return {"random_state": self.random_state,
"view_weights": self.view_weights,
"monoview_classifier_name": self.monoview_classifier_name,
"monoview_classifier_config": self.monoview_classifier_config}
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, X = self.transform_data_to_monoview(X, train_indices,
view_indices)
self.used_views = view_indices
if np.unique(y[train_indices]).shape[0] > 2 and \
not (isinstance(self.monoview_classifier, MultiClassWrapper)):
self.monoview_classifier = get_mc_estim(self.monoview_classifier,
self.random_state,
multiview=False,
y=y[train_indices])
self.monoview_classifier.fit(X, y[train_indices])
self.monoview_classifier_config = self.monoview_classifier.get_params()
return self
def predict(self, X, sample_indices=None, view_indices=None):
_, X = self.transform_data_to_monoview(X, sample_indices, view_indices)
self._check_views(self.view_indices)
predicted_labels = self.monoview_classifier.predict(X)
return predicted_labels
def transform_data_to_monoview(self, dataset, sample_indices,
view_indices):
"""Here, we extract the data from the HDF5 dataset file and store all
the concatenated views in one variable"""
sample_indices, self.view_indices = get_samples_views_indices(dataset,
sample_indices,
view_indices)
if self.view_weights is None:
self.view_weights = np.ones(len(self.view_indices), dtype=float)
else:
self.view_weights = np.array(self.view_weights)
self.view_weights /= float(np.sum(self.view_weights))
X = self.hdf5_to_monoview(dataset, sample_indices)
return sample_indices, X
def hdf5_to_monoview(self, dataset, samples):
"""Here, we concatenate the views for the asked samples """
monoview_data = np.concatenate(
[dataset.get_v(view_idx, samples)
for view_weight, (index, view_idx)
in zip(self.view_weights, enumerate(self.view_indices))], axis=1)
return monoview_data
# def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config):
# if monoview_classifier_name in monoview_classifier_config:
# self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name])
# else:
# self.monoview_classifier.set_params(**monoview_classifier_config)
import numpy as np
from .additions.fusion_utils import BaseFusionClassifier
from ..multiview.multiview_utils import get_available_monoview_classifiers, \
BaseMultiviewClassifier, ConfigGenerator
from ..utils.dataset import get_samples_views_indices
from ..utils.multiclass import get_mc_estim, MultiClassWrapper
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionCB"
class EarlyFusionCB(BaseMultiviewClassifier, BaseFusionClassifier):
"""
Builds a monoview dataset by concatenating the views (with a weight if
needed) and learns a monoview classifier on the concatenation
"""
def __init__(self, random_state=None, view_weights=None,
monoview_classifier_name="cb_boost",
monoview_classifier_config={}):
BaseMultiviewClassifier.__init__(self, random_state=random_state)
self.view_weights = view_weights
self.monoview_classifier_name = monoview_classifier_name
self.short_name = "early_fusion_cb"
if monoview_classifier_name in monoview_classifier_config:
self.monoview_classifier_config = monoview_classifier_config[
monoview_classifier_name]
self.monoview_classifier_config = monoview_classifier_config
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name, monoview_classifier_config)
self.param_names = ["monoview_classifier_config"]
self.distribs = [get_available_monoview_classifiers(),
ConfigGenerator(get_available_monoview_classifiers())]
self.classed_params = []
self.weird_strings = {}
def set_params(self, monoview_classifier_name="cb_boost",
monoview_classifier_config={}, **params):
self.monoview_classifier_name = monoview_classifier_name
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name,
monoview_classifier_config)
self.monoview_classifier_config = self.monoview_classifier.get_params()
self.short_name = "early_fusion_cb"
return self
def get_params(self, deep=True):
return {"random_state": self.random_state,
"view_weights": self.view_weights,
"monoview_classifier_name": self.monoview_classifier_name,
"monoview_classifier_config": self.monoview_classifier_config}
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, X = self.transform_data_to_monoview(X, train_indices,
view_indices)
self.used_views = view_indices
if np.unique(y[train_indices]).shape[0] > 2 and \
not (isinstance(self.monoview_classifier, MultiClassWrapper)):
self.monoview_classifier = get_mc_estim(self.monoview_classifier,
self.random_state,
multiview=False,
y=y[train_indices])
self.monoview_classifier.fit(X, y[train_indices])
self.monoview_classifier_config = self.monoview_classifier.get_params()
return self
def predict(self, X, sample_indices=None, view_indices=None):
_, X = self.transform_data_to_monoview(X, sample_indices, view_indices)
self._check_views(self.view_indices)
predicted_labels = self.monoview_classifier.predict(X)
return predicted_labels
def transform_data_to_monoview(self, dataset, sample_indices,
view_indices):
"""Here, we extract the data from the HDF5 dataset file and store all
the concatenated views in one variable"""
sample_indices, self.view_indices = get_samples_views_indices(dataset,
sample_indices,
view_indices)
if self.view_weights is None:
self.view_weights = np.ones(len(self.view_indices), dtype=float)
else:
self.view_weights = np.array(self.view_weights)
self.view_weights /= float(np.sum(self.view_weights))
X = self.hdf5_to_monoview(dataset, sample_indices)
return sample_indices, X
def hdf5_to_monoview(self, dataset, samples):
"""Here, we concatenate the views for the asked samples """
monoview_data = np.concatenate(
[dataset.get_v(view_idx, samples)
for view_weight, (index, view_idx)
in zip(self.view_weights, enumerate(self.view_indices))], axis=1)
return monoview_data
# def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config):
# if monoview_classifier_name in monoview_classifier_config:
# self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name])
# else:
# self.monoview_classifier.set_params(**monoview_classifier_config)
import numpy as np
from .additions.fusion_utils import BaseFusionClassifier
from ..multiview.multiview_utils import get_available_monoview_classifiers, \
BaseMultiviewClassifier, ConfigGenerator
from ..utils.dataset import get_samples_views_indices
from ..utils.multiclass import get_mc_estim, MultiClassWrapper
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionDT"
class EarlyFusionDT(BaseMultiviewClassifier, BaseFusionClassifier):
"""
Builds a monoview dataset by concatenating the views (with a weight if
needed) and learns a monoview classifier on the concatenation
"""
def __init__(self, random_state=None, view_weights=None,
monoview_classifier_name="decision_tree",
monoview_classifier_config={"max_depth":100}):
BaseMultiviewClassifier.__init__(self, random_state=random_state)
self.view_weights = view_weights
self.monoview_classifier_name = monoview_classifier_name
self.short_name = "early_fusion_dt"
if monoview_classifier_name in monoview_classifier_config:
self.monoview_classifier_config = monoview_classifier_config[
monoview_classifier_name]
self.monoview_classifier_config = monoview_classifier_config
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name, monoview_classifier_config)
self.param_names = ["monoview_classifier_config"]
self.distribs = [get_available_monoview_classifiers(),
ConfigGenerator(get_available_monoview_classifiers())]
self.classed_params = []
self.weird_strings = {}
def set_params(self, monoview_classifier_name="decision_tree",
monoview_classifier_config={}, **params):
self.monoview_classifier_name = monoview_classifier_name
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name,
monoview_classifier_config)
self.monoview_classifier_config = self.monoview_classifier.get_params()
self.short_name = "early_fusion_dt"
return self
def get_params(self, deep=True):
return {"random_state": self.random_state,
"view_weights": self.view_weights,
"monoview_classifier_name": self.monoview_classifier_name,
"monoview_classifier_config": self.monoview_classifier_config}
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, X = self.transform_data_to_monoview(X, train_indices,
view_indices)
self.used_views = view_indices
if np.unique(y[train_indices]).shape[0] > 2 and \
not (isinstance(self.monoview_classifier, MultiClassWrapper)):
self.monoview_classifier = get_mc_estim(self.monoview_classifier,
self.random_state,
multiview=False,
y=y[train_indices])
self.monoview_classifier.fit(X, y[train_indices])
self.monoview_classifier_config = self.monoview_classifier.get_params()
return self
def predict(self, X, sample_indices=None, view_indices=None):
_, X = self.transform_data_to_monoview(X, sample_indices, view_indices)
self._check_views(self.view_indices)
predicted_labels = self.monoview_classifier.predict(X)
return predicted_labels
def transform_data_to_monoview(self, dataset, sample_indices,
view_indices):
"""Here, we extract the data from the HDF5 dataset file and store all
the concatenated views in one variable"""
sample_indices, self.view_indices = get_samples_views_indices(dataset,
sample_indices,
view_indices)
if self.view_weights is None:
self.view_weights = np.ones(len(self.view_indices), dtype=float)
else:
self.view_weights = np.array(self.view_weights)
self.view_weights /= float(np.sum(self.view_weights))
X = self.hdf5_to_monoview(dataset, sample_indices)
return sample_indices, X
def hdf5_to_monoview(self, dataset, samples):
"""Here, we concatenate the views for the asked samples """
monoview_data = np.concatenate(
[dataset.get_v(view_idx, samples)
for view_weight, (index, view_idx)
in zip(self.view_weights, enumerate(self.view_indices))], axis=1)
return monoview_data
# def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config):
# if monoview_classifier_name in monoview_classifier_config:
# self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name])
# else:
# self.monoview_classifier.set_params(**monoview_classifier_config)
import numpy as np
from .additions.fusion_utils import BaseFusionClassifier
from ..multiview.multiview_utils import get_available_monoview_classifiers, \
BaseMultiviewClassifier, ConfigGenerator
from ..utils.dataset import get_samples_views_indices
from ..utils.multiclass import get_mc_estim, MultiClassWrapper
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionRF"
class EarlyFusionRF(BaseMultiviewClassifier, BaseFusionClassifier):
"""
Builds a monoview dataset by concatenating the views (with a weight if
needed) and learns a monoview classifier on the concatenation
"""
def __init__(self, random_state=None, view_weights=None,
monoview_classifier_name="random_forest",
monoview_classifier_config={}):
BaseMultiviewClassifier.__init__(self, random_state=random_state)
self.view_weights = view_weights
self.monoview_classifier_name = monoview_classifier_name
self.short_name = "early_fusion_rf"
if monoview_classifier_name in monoview_classifier_config:
self.monoview_classifier_config = monoview_classifier_config[
monoview_classifier_name]
self.monoview_classifier_config = monoview_classifier_config
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name, monoview_classifier_config)
self.param_names = ["monoview_classifier_config"]
self.distribs = [get_available_monoview_classifiers(),
ConfigGenerator(get_available_monoview_classifiers())]
self.classed_params = []
self.weird_strings = {}
def set_params(self, monoview_classifier_name="random_forest",
monoview_classifier_config={}, **params):
self.monoview_classifier_name = monoview_classifier_name
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name,
monoview_classifier_config)
self.monoview_classifier_config = self.monoview_classifier.get_params()
self.short_name = "early_fusion_rf"
return self
def get_params(self, deep=True):
return {"random_state": self.random_state,
"view_weights": self.view_weights,
"monoview_classifier_name": self.monoview_classifier_name,
"monoview_classifier_config": self.monoview_classifier_config}
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, X = self.transform_data_to_monoview(X, train_indices,
view_indices)
self.used_views = view_indices
if np.unique(y[train_indices]).shape[0] > 2 and \
not (isinstance(self.monoview_classifier, MultiClassWrapper)):
self.monoview_classifier = get_mc_estim(self.monoview_classifier,
self.random_state,
multiview=False,
y=y[train_indices])
self.monoview_classifier.fit(X, y[train_indices])
self.monoview_classifier_config = self.monoview_classifier.get_params()
return self
def predict(self, X, sample_indices=None, view_indices=None):
_, X = self.transform_data_to_monoview(X, sample_indices, view_indices)
self._check_views(self.view_indices)
predicted_labels = self.monoview_classifier.predict(X)
return predicted_labels
def transform_data_to_monoview(self, dataset, sample_indices,
view_indices):
"""Here, we extract the data from the HDF5 dataset file and store all
the concatenated views in one variable"""
sample_indices, self.view_indices = get_samples_views_indices(dataset,
sample_indices,
view_indices)
if self.view_weights is None:
self.view_weights = np.ones(len(self.view_indices), dtype=float)
else:
self.view_weights = np.array(self.view_weights)
self.view_weights /= float(np.sum(self.view_weights))
X = self.hdf5_to_monoview(dataset, sample_indices)
return sample_indices, X
def hdf5_to_monoview(self, dataset, samples):
"""Here, we concatenate the views for the asked samples """
monoview_data = np.concatenate(
[dataset.get_v(view_idx, samples)
for view_weight, (index, view_idx)
in zip(self.view_weights, enumerate(self.view_indices))], axis=1)
return monoview_data
# def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config):
# if monoview_classifier_name in monoview_classifier_config:
# self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name])
# else:
# self.monoview_classifier.set_params(**monoview_classifier_config)
import numpy as np
from .additions.fusion_utils import BaseFusionClassifier
from ..multiview.multiview_utils import get_available_monoview_classifiers, \
BaseMultiviewClassifier, ConfigGenerator
from ..utils.dataset import get_samples_views_indices
from ..utils.multiclass import get_mc_estim, MultiClassWrapper
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionSVM"
class EarlyFusionSVM(BaseMultiviewClassifier, BaseFusionClassifier):
"""
Builds a monoview dataset by concatenating the views (with a weight if
needed) and learns a monoview classifier on the concatenation
"""
def __init__(self, random_state=None, view_weights=None,
monoview_classifier_name="svm_rbf",
monoview_classifier_config={}):
BaseMultiviewClassifier.__init__(self, random_state=random_state)
self.view_weights = view_weights
self.monoview_classifier_name = monoview_classifier_name
self.short_name = "early_fusion_svm"
if monoview_classifier_name in monoview_classifier_config:
self.monoview_classifier_config = monoview_classifier_config[
monoview_classifier_name]
self.monoview_classifier_config = monoview_classifier_config
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name, monoview_classifier_config)
self.param_names = ["monoview_classifier_config"]
self.distribs = [get_available_monoview_classifiers(),
ConfigGenerator(get_available_monoview_classifiers())]
self.classed_params = []
self.weird_strings = {}
def set_params(self, monoview_classifier_name="svm_rbf",
monoview_classifier_config={}, **params):
self.monoview_classifier_name = monoview_classifier_name
self.monoview_classifier = self.init_monoview_estimator(
monoview_classifier_name,
monoview_classifier_config)
self.monoview_classifier_config = self.monoview_classifier.get_params()
self.short_name = "early_fusion_svm"
return self
def get_params(self, deep=True):
return {"random_state": self.random_state,
"view_weights": self.view_weights,
"monoview_classifier_name": self.monoview_classifier_name,
"monoview_classifier_config": self.monoview_classifier_config}
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, X = self.transform_data_to_monoview(X, train_indices,
view_indices)
self.used_views = view_indices
if np.unique(y[train_indices]).shape[0] > 2 and \
not (isinstance(self.monoview_classifier, MultiClassWrapper)):
self.monoview_classifier = get_mc_estim(self.monoview_classifier,
self.random_state,
multiview=False,
y=y[train_indices])
self.monoview_classifier.fit(X, y[train_indices])
self.monoview_classifier_config = self.monoview_classifier.get_params()
return self
def predict(self, X, sample_indices=None, view_indices=None):
_, X = self.transform_data_to_monoview(X, sample_indices, view_indices)
self._check_views(self.view_indices)
predicted_labels = self.monoview_classifier.predict(X)
return predicted_labels
def transform_data_to_monoview(self, dataset, sample_indices,
view_indices):
"""Here, we extract the data from the HDF5 dataset file and store all
the concatenated views in one variable"""
sample_indices, self.view_indices = get_samples_views_indices(dataset,
sample_indices,
view_indices)
if self.view_weights is None:
self.view_weights = np.ones(len(self.view_indices), dtype=float)
else:
self.view_weights = np.array(self.view_weights)
self.view_weights /= float(np.sum(self.view_weights))
X = self.hdf5_to_monoview(dataset, sample_indices)
return sample_indices, X
def hdf5_to_monoview(self, dataset, samples):
"""Here, we concatenate the views for the asked samples """
monoview_data = np.concatenate(
[dataset.get_v(view_idx, samples)
for view_weight, (index, view_idx)
in zip(self.view_weights, enumerate(self.view_indices))], axis=1)
return monoview_data
# def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config):
# if monoview_classifier_name in monoview_classifier_config:
# self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name])
# else:
# self.monoview_classifier.set_params(**monoview_classifier_config)
from sklearn.tree import DecisionTreeClassifier
from multimodal.boosting.cumbo import MuCumboClassifier
from multimodal.boosting.combo import MuComboClassifier
from ..multiview.multiview_utils import BaseMultiviewClassifier
from ..utils.hyper_parameter_search import CustomRandint
from ..utils.dataset import get_samples_views_indices
......@@ -10,14 +10,14 @@ from ..utils.base import base_boosting_estimators
classifier_class_name = "MuCumbo"
class MuCumbo(BaseMultiviewClassifier, MuCumboClassifier):
class MuCumbo(BaseMultiviewClassifier, MuComboClassifier):
def __init__(self, base_estimator=None,
n_estimators=50,
random_state=None,**kwargs):
BaseMultiviewClassifier.__init__(self, random_state)
base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs)
MuCumboClassifier.__init__(self, base_estimator=base_estimator,
MuComboClassifier.__init__(self, base_estimator=base_estimator,
n_estimators=n_estimators,
random_state=random_state,)
self.param_names = ["base_estimator", "n_estimators", "random_state",]
......@@ -31,7 +31,7 @@ class MuCumbo(BaseMultiviewClassifier, MuCumboClassifier):
self.used_views = view_indices
numpy_X, view_limits = X.to_numpy_array(sample_indices=train_indices,
view_indices=view_indices)
return MuCumboClassifier.fit(self, numpy_X, y[train_indices],
return MuComboClassifier.fit(self, numpy_X, y[train_indices],
view_limits)
def predict(self, X, sample_indices=None, view_indices=None):
......@@ -41,7 +41,7 @@ class MuCumbo(BaseMultiviewClassifier, MuCumboClassifier):
self._check_views(view_indices)
numpy_X, view_limits = X.to_numpy_array(sample_indices=sample_indices,
view_indices=view_indices)
return MuCumboClassifier.predict(self, numpy_X)
return MuComboClassifier.predict(self, numpy_X)
def get_interpretation(self, directory, base_file_name, labels,
multiclass=False):
......
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import BaseEstimator
import numpy as np
import os
......@@ -20,7 +21,15 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
random_state=None,
best_view_mode="edge", **kwargs):
BaseMultiviewClassifier.__init__(self, random_state)
base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs)
if type(base_estimator) is list:
if type(base_estimator[0]) is dict:
base_estimator = [self.set_base_estim_from_dict(estim, **kwargs) for estim in base_estimator]
elif isinstance(base_estimator[0], BaseEstimator):
base_estimator = base_estimator
else:
raise ValueError("base_estimator should ba a list of dict or a sklearn classifier list")
else:
base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs)
MumboClassifier.__init__(self, base_estimator=base_estimator,
n_estimators=n_estimators,
random_state=random_state,
......@@ -103,3 +112,7 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
interpret_string +="\n The boosting process selected views : \n" + ", ".join(map(str, self.best_views_))
interpret_string+="\n\n With estimator weights : \n"+ "\n".join(map(str,self.estimator_weights_/np.sum(self.estimator_weights_)))
return interpret_string
def accepts_multi_class(self, random_state, n_samples=10, dim=2,
n_classes=3, n_views=2):
return True
......@@ -8,9 +8,9 @@ class MVCBBoost(MultiviewCBoundBoostingAdapt):
self_complemented=True,
twice_the_same=False,
random_start=False,
n_stumps=10,
n_stumps=100,
c_bound_sol=True,
base_estimator="Trees",
base_estimator="Stumps",
max_depth=1,
mincq_tracking=False,
weight_add=3,
......@@ -21,7 +21,7 @@ class MVCBBoost(MultiviewCBoundBoostingAdapt):
min_cq_mu=10e-3,
sig_mult=15,
sig_offset=5,
use_previous_voters=False, **kwargs):
use_previous_voters=True, **kwargs):
MultiviewCBoundBoostingAdapt.__init__(self, n_estimators=n_estimators, random_state=random_state,
self_complemented=self_complemented, twice_the_same=twice_the_same,
random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol, max_depth=max_depth,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment