Skip to content
Snippets Groups Projects
Commit e5c04281 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

added some files

parent a32a7923
No related branches found
No related tags found
No related merge requests found
# The base configuration of the benchmark
log: True
name: ["ionosphere", "abalone", "australian", "balance", "bupa", "cylinder", "hepatitis", "pima", "yeast", "zoo"]
label: "comp_1"
name: ['tnbc_mazid']
label: ""
file_type: ".hdf5"
views:
pathf: "/home/baptiste/Documents/Datasets/UCI/both/"
pathf: "/home/baptiste/Documents/Datasets/Mazid/"
nice: 0
random_state: 42
nb_cores: 1
......@@ -13,34 +13,53 @@ debug: True
add_noise: False
noise_std: 0.0
res_dir: "../results/"
track_tracebacks: False
track_tracebacks: True
# All the classification-realted configuration options
multiclass_method: "oneVersusOne"
split: 0.50
split: 0.30
nb_folds: 5
nb_class: 2
classes:
type: ["monoview",]
algos_monoview: ["cb_boost", "self_opt_cb", "adaboost", "cq_boost", "min_cq", "adaboost_pregen", "self_opt_cb_pseudo", "self_opt_cb_root"]
algos_multiview: ["mv_cb_boost","early_fusion_dt", "early_fusion_cb", "early_fusion_rf","mumbo", "early_fusion_svm" ]
type: ["monoview","multiview"]
algos_monoview: ["samba", "scm_bagging", "random_forest", "adaboost", 'scm']
algos_multiview: ["early_fusion_adaboost", "early_fusion_decision_tree", "early_fusion_random_forest", "early_fusion_samba"]
stats_iter: 5
metrics:
accuracy_score: {}
balanced_accuracy: {}
f1_score:
average: 'micro'
metric_princ: "accuracy_score"
hps_type: "None"
accuracy_score: {}
metric_princ: "balanced_accuracy"
hps_type: "Random"
hps_args:
n_iter: 30
equivalent_draws: True
n_iter: 20
equivalent_draws: False
svm_rbf:
C: 0.7
scm_bagging:
{max_features: 0.908115713423863, max_rules: 9, max_samples: 0.9277949143533335, model_type: conjunction,
n_estimators: 109, p_options: 0.7823433255515356}
samba:
n_estimators: 22
adaboost:
{base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0, base_estimator__class_weight: null,
base_estimator__criterion: gini, base_estimator__max_depth: 5, base_estimator__max_features: null,
base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1, base_estimator__min_samples_split: 2,
base_estimator__min_weight_fraction_leaf: 0.0, base_estimator__random_state: null,
base_estimator__splitter: best, n_estimators: 354}
svm_linear:
C: 0.3867
cb_boost:
n_stumps: 1
n_max_iterations: 10
n_max_iterations: 20
estimators_generator: "Stumps"
cq_boost:
......@@ -50,15 +69,42 @@ cq_boost:
min_cq:
n_stumps_per_attribute: 1
adaboost:
n_estimators: 10
decision_tree:
{criterion: entropy, max_depth: 271, splitter: random}
adaboost_pregen:
n_estimators: 10
n_stumps: 1
early_fusion_adaboost:
{base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0, base_estimator__class_weight: null,
base_estimator__criterion: gini, base_estimator__max_depth: 5, base_estimator__max_features: null,
base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1, base_estimator__min_samples_split: 2,
base_estimator__min_weight_fraction_leaf: 0.0, base_estimator__random_state: null,
base_estimator__splitter: best, base_estimator_config: null, n_estimators: 273}
decision_tree:
max_depth: 2
early_fusion_decision_tree:
{criterion: entropy, max_depth: 293, splitter: random}
early_fusion_random_forest:
{criterion: gini, max_depth: 8, n_estimators: 46}
random_forest:
{criterion: gini, max_depth: 8, n_estimators: 32}
weighted_linear_late_fusion:
classifier_configs:
- decision_tree: {criterion: entropy, max_depth: 112, splitter: random}
- adaboost: {base_estimator: DecisionTreeClassifier, base_estimator__ccp_alpha: 0.0,
base_estimator__class_weight: null, base_estimator__criterion: gini, base_estimator__max_depth: 2,
base_estimator__max_features: null, base_estimator__max_leaf_nodes: null, base_estimator__min_impurity_decrease: 0.0,
base_estimator__min_impurity_split: null, base_estimator__min_samples_leaf: 1,
base_estimator__min_samples_split: 2, base_estimator__min_weight_fraction_leaf: 0.0,
base_estimator__random_state: null, base_estimator__splitter: best, n_estimators: 400}
classifiers_names: [decision_tree, adaboost]
nb_cores: 1
rs: 724
weights: [0.9636627605010293, 0.3834415188257777]
scm:
{max_rules: 10, model_type: conjunction, p: 0.8310271995093625}
mumbo:
base_estimator:
......
# The base configuration of the benchmark
log: True
name: ["mnist_0_9_train"]
name: ["multiview_mnist"]
label: "_"
file_type: ".hdf5"
views: ["NIMST_data", ]
pathf: "/home/baptiste/Documents/Datasets/MNist/"
views:
pathf: "examples/data/"
nice: 0
random_state: 43
nb_cores: 1
full: False
full: True
debug: True
add_noise: False
noise_std: 0.0
......@@ -19,16 +19,20 @@ track_tracebacks: False
multiclass_method: "oneVersusOne"
split: 0.96
nb_folds: 5
nb_class: 2
nb_class:
classes:
type: ["monoview",]
algos_monoview: ["hm_gb_cbound","cb_boost"]
algos_multiview: ["mumbo","mvml"]
type: ["monoview","multiview"]
algos_monoview: ["decision_tree","adaboost"]
algos_multiview: ["mumbo","mvml", 'lp_norm_mkl', 'mucombo', 'early_fusion_decision_tree', 'early_fusion_adaboost']
stats_iter: 1
metrics:
zero_one_loss: {}
accuracy_score: {}
f1_score: {}
metric_princ: "zero_one_loss"
metric_princ: "accuracy_score"
hps_type: "None"
hps_args:
n_iter: 2
mumbo:
base_estimator:
decision_tree:
max_depth: 3
\ No newline at end of file
......@@ -94,6 +94,7 @@ def setup_package():
# ce qui est notre cas
license="GNUGPL",
# Il y a encore une chiée de paramètres possibles, mais avec ça vous
# couvrez 90% des besoins
# ext_modules=cythonize(
......
......@@ -548,7 +548,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
argument_dictionaries=None,
benchmark=None, views=None, views_indices=None,
flag=None, labels=None,
track_tracebacks=False): # pragma: no cover
track_tracebacks=False, nb_cores=1): # pragma: no cover
results_monoview, labels_names = benchmark_init(directory,
classification_indices,
labels,
......@@ -564,7 +564,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
results_monoview += [
exec_monoview(directory, X, Y, args["name"], labels_names,
classification_indices, k_folds,
1, args["file_type"], args["pathf"], random_state,
nb_cores, args["file_type"], args["pathf"], random_state,
hyper_param_search=hyper_param_search,
metrics=metrics,
**arguments)]
......@@ -679,7 +679,7 @@ def exec_benchmark(nb_cores, stats_iter,
for arguments in benchmark_arguments_dictionaries:
benchmark_results = exec_one_benchmark_mono_core(
dataset_var=dataset_var,
track_tracebacks=track_tracebacks,
track_tracebacks=track_tracebacks, nb_cores=nb_cores,
**arguments)
analyze_iterations([benchmark_results],
benchmark_arguments_dictionaries, stats_iter,
......
"""Functions :
score: to get the accuracy score
get_scorer: returns a sklearn scorer for grid search
"""
from sklearn.metrics import balanced_accuracy_score as metric
from sklearn.metrics import make_scorer
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
"""Arguments:
y_true: real labels
y_pred: predicted labels
Keyword Arguments:
"0": weights to compute accuracy
Returns:
Weighted accuracy score for y_true, y_pred"""
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
"""Keyword Arguments:
"0": weights to compute accuracy
Returns:
A weighted sklearn scorer for accuracy"""
return make_scorer(metric, greater_is_better=True,
**kwargs)
def get_config(**kwargs):
config_string = "Balanced accuracy score using {}, (higher is better)".format(
kwargs)
return config_string
......@@ -35,7 +35,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
)
self.param_names = ["n_estimators", "base_estimator"]
self.classed_params = ["base_estimator"]
self.distribs = [CustomRandint(low=1, high=500),
self.distribs = [CustomRandint(low=1, high=100),
base_boosting_estimators]
self.weird_strings = {"base_estimator": "class_name"}
self.plotted_metric = metrics.zero_one_loss
......
......@@ -17,6 +17,7 @@ __status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "AdaboostPregen"
class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier,
PregenClassifier):
"""
......
from SamBA.samba import NeighborHoodClassifier, ExpTrainWeighting
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from SamBA.relevances import *
from SamBA.distances import *
from sklearn.preprocessing import RobustScaler
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
# class Decis
classifier_class_name = "SamBAClf"
class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
def __init__(self, base_estimator=DecisionTreeClassifier(max_depth=1,
splitter='best',
criterion='gini'),
n_estimators=2,
estimator_params=tuple(),
relevance=MarginRelevance(),
distance=EuclidianDist(),
train_weighting=ExpTrainWeighting(),
keep_selected_features=True,
normalizer=RobustScaler(),
b=2,
pred_train=False,
forced_diversity=True,
**kwargs):
"""
Parameters
----------
random_state
model_type
max_rules
p
kwargs
"""
super(SamBAClf, self).__init__(base_estimator=base_estimator,
n_estimators=n_estimators,
estimator_params=estimator_params,
relevance=relevance,
distance=distance,
train_weighting=train_weighting,
keep_selected_features=keep_selected_features,
normalizer=normalizer,
forced_diversity=forced_diversity,
b=b,pred_train=pred_train)
self.param_names = ["n_estimators", "relevance", "distance",
"train_weighting", "b", "pred_train", "normalizer"]
self.distribs = [CustomRandint(low=1, high=30),
[ExpRelevance(), MarginRelevance()],
[EuclidianDist(), PolarDist(), ExpEuclidianDist()],
[ExpTrainWeighting()],
CustomUniform(0.5, 3),
[True, False],
[RobustScaler(), None]]
self.classed_params = []
self.weird_strings = {}
def get_interpretation(self, directory, base_file_name, y_test, multi_class=False):
interpret_string = self.get_feature_importance(directory, base_file_name)
return interpret_string
from .additions.early_fusion_from_monoview import BaseEarlyFusion
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
from ..monoview_classifiers.samba import SamBAClf
from SamBA.samba import *
from SamBA.distances import *
from SamBA.relevances import *
from sklearn.tree import DecisionTreeClassifier
classifier_class_name = "EarlyFusionSamba"
class EarlyFusionSamba(BaseEarlyFusion):
def __init__(self, random_state=None, base_estimator=DecisionTreeClassifier(max_depth=1,
splitter='best',
criterion='gini'),
n_estimators=2,
estimator_params=tuple(),
relevance=MarginRelevance(),
distance=EuclidianDist(),
train_weighting=ExpTrainWeighting(pred_train=True),
keep_selected_features=True,
normalizer=RobustScaler(),
pred_train=False,
b=2,
**kwargs):
BaseEarlyFusion.__init__(self, random_state=random_state,
monoview_classifier="samba",
base_estimator=base_estimator, estimator_params=estimator_params,
relevance=relevance, distance=distance, train_weighting=train_weighting,
keep_selected_features=keep_selected_features, normalizer=normalizer,
n_estimators=n_estimators, pred_train=pred_train, b=b, **kwargs)
self.param_names = ["n_estimators", "relevance", "distance",
"train_weighting", "b", "pred_train"]
self.distribs = [CustomRandint(low=1, high=30),
[ExpRelevance(), MarginRelevance()],
[EuclidianDist(), PolarDist(), ExpEuclidianDist()],
[ExpTrainWeighting(pred_train=True)],
CustomUniform(0.25, 3),
[True, False]]
\ No newline at end of file
......@@ -47,6 +47,13 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
"""
if base_estimator is None:
self.base_estimator = DecisionTreeClassifier()
elif type(base_estimator) is list:
if type(base_estimator[0]) is dict:
self.base_estimator = [self.set_base_estim_from_dict(estim) for estim in base_estimator]
elif isinstance(base_estimator[0], BaseEstimator):
self.base_estimator = base_estimator
else:
raise ValueError("base_estimator should ba a list of dict or a sklearn classifier list")
elif isinstance(base_estimator, dict):
self.base_estimator = self.set_base_estim_from_dict(base_estimator)
MumboClassifier.set_params(self, **params)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment