Skip to content
Snippets Groups Projects
Commit ac8ca802 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Object done

parent aa680623
Branches
No related tags found
No related merge requests found
Pipeline #10762 failed
Showing
with 1449 additions and 717 deletions
......@@ -27,7 +27,7 @@ res_dir: "examples/results/example_0/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
track_tracebacks: False
# All the classification-realted configuration options
......@@ -40,14 +40,14 @@ nb_class:
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
cl_type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree", "adaboost"]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["early_fusion_decision_tree", "early_fusion_adaboost", "weighted_linear_late_fusion",]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
stats_iter: 2
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
......
This diff is collapsed.
......@@ -67,27 +67,27 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
multi_class=False): # pragma: no cover
interpretString = ""
interpretString += self.get_feature_importance(directory,
base_file_name,
feature_ids)
interpretString += "\n\n Estimator error | Estimator weight\n"
interpretString += "\n".join(
[str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
error, weight in
zip(self.estimator_errors_, self.estimator_weights_)])
step_test_metrics = np.array(
[self.plotted_metric.score(y_test, step_pred) for step_pred in
self.step_predictions])
get_accuracy_graph(step_test_metrics, "Adaboost",
os.path.join(directory,
base_file_name + "test_metrics.png"),
self.plotted_metric_name, set="test")
np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
step_test_metrics,
delimiter=',')
np.savetxt(
os.path.join(directory, base_file_name + "train_metrics.csv"),
self.metrics, delimiter=',')
np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
np.array([self.train_time, self.pred_time]), delimiter=',')
# interpretString += self.get_feature_importance(directory,
# base_file_name,
# feature_ids)
# interpretString += "\n\n Estimator error | Estimator weight\n"
# interpretString += "\n".join(
# [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
# error, weight in
# zip(self.estimator_errors_, self.estimator_weights_)])
# step_test_metrics = np.array(
# [self.plotted_metric.score(y_test, step_pred) for step_pred in
# self.step_predictions])
# get_accuracy_graph(step_test_metrics, "Adaboost",
# os.path.join(directory,
# base_file_name + "test_metrics.png"),
# self.plotted_metric_name, set="test")
# np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
# step_test_metrics,
# delimiter=',')
# np.savetxt(
# os.path.join(directory, base_file_name + "train_metrics.csv"),
# self.metrics, delimiter=',')
# np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
# np.array([self.train_time, self.pred_time]), delimiter=',')
return interpretString
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from imblearn.under_sampling import RandomUnderSampler
from spkm.spkm_wrapper import SPKMlikeSklearn
from spkm.kernels_and_gradients import RBFKernel, PolyKernel
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.hyper_parameter_search import CustomRandint
from ..monoview_classifiers.spkm import SPKM
classifier_class_name = "BaggedSPKM"
class BaggedSPKM(BaseMonoviewClassifier, SPKMlikeSklearn):
def __init__(self, random_state=42, n_u=2, kernel=RBFKernel(0.5),
spkmregP=1, spkminit="randn",
nspkminits=10, preprocessinglist=[0,1,2], **kwargs):
SPKM.__init__(self, random_state=random_state,
n_u=n_u,
kernel=kernel,
spkmregP=spkmregP,
spkminit=spkminit,
nspkminits=nspkminits,
preprocessinglist=preprocessinglist)
self.rus = RandomUnderSampler(random_state=random_state)
def fit(self, X, y):
self.lb = LabelBinarizer(pos_label=1, neg_label=-1)
y = self.lb.fit_transform(y)
return SPKMlikeSklearn.fit(self, X, y[:,0],)
def predict(self, X, preprocess=True):
return self.lb.inverse_transform(np.sign(SPKMlikeSklearn.predict(self, X)))
def get_interpretation(self, directory, base_file_name, labels, multiclass=False):
u = self.feature_interpretability()
importances_sum = np.sum(u)
self.feature_importances_ = u/importances_sum
return ""
def accepts_multi_class(self, random_state, n_samples=10, dim=2,
n_classes=3, n_views=2):
return False
\ No newline at end of file
......@@ -23,6 +23,7 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier):
DecisionTreeClassifier.__init__(self,
max_depth=max_depth,
criterion=criterion,
class_weight="balanced",
splitter=splitter,
random_state=random_state
)
......
from imblearn.ensemble import BalancedBaggingClassifier
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.base import base_boosting_estimators
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
classifier_class_name = "IBDT"
class IBDT(BaseMonoviewClassifier, BalancedBaggingClassifier):
def __init__(self, random_state=None, n_estimators=10,
sampling_strategy="auto", base_estimator=DecisionTreeClassifier(),
replacement=False, **kwargs):
super(IBDT, self).__init__(random_state=random_state,
base_estimator=base_estimator,
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__max_depth",
"base_estimator__criterion",
"base_estimator__splitter",]
self.classed_params = ["base_estimator"]
self.distribs = [CustomRandint(low=1, high=50),
["auto"],CustomRandint(low=1, high=300),
["gini", "entropy"],
["best", "random"],]
self.weird_strings=[]
def fit(self, X, y):
BalancedBaggingClassifier.fit(self, X, y)
self.feature_importances_ = np.zeros(X.shape[1])
for estim in self.estimators_:
if hasattr(estim['classifier'], 'feature_importances_'):
self.feature_importances_ += estim['classifier'].feature_importances_
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
from imblearn.ensemble import BalancedBaggingClassifier
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.base import base_boosting_estimators
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
classifier_class_name = "IBRF"
class IBRF(BaseMonoviewClassifier, BalancedBaggingClassifier):
def __init__(self, random_state=None, n_estimators=10,
sampling_strategy="auto", replacement=False,
base_estimator=RandomForestClassifier(), **kwargs):
super(IBRF, self).__init__(random_state=random_state,
base_estimator=base_estimator,
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__n_estimators",
"base_estimator__max_depth",
"base_estimator__criterion"]
self.classed_params = ["base_estimator"]
self.distribs = [CustomRandint(low=1, high=50),
["auto"],CustomRandint(low=1, high=300),
CustomRandint(low=1, high=10),
["gini", "entropy"],]
self.weird_strings=[]
def fit(self, X, y):
BalancedBaggingClassifier.fit(self, X, y)
self.feature_importances_ = np.zeros(X.shape[1])
for estim in self.estimators_:
if hasattr(estim['classifier'], 'feature_importances_'):
self.feature_importances_ += estim['classifier'].feature_importances_
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
from imblearn.ensemble import BalancedBaggingClassifier
import numpy as np
from ..monoview_classifiers.random_scm import ScmBagging
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.base import base_boosting_estimators
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
classifier_class_name = "IBRSCM"
class IBRSCM(BaseMonoviewClassifier, BalancedBaggingClassifier):
def __init__(self, random_state=None, n_estimators=10,
sampling_strategy="auto", replacement=False,
base_estimator=ScmBagging(), **kwargs):
super(IBRSCM, self).__init__(random_state=random_state,
base_estimator=base_estimator,
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__n_estimators",
"base_estimator__max_rules",
"base_estimator__max_samples",
"base_estimator__max_features",
"base_estimator__model_type",
"base_estimator__p_options",]
self.classed_params = ["base_estimator"]
self.distribs = [CustomRandint(low=1, high=50),
["auto"],CustomRandint(low=1, high=300),
CustomRandint(low=1, high=20),
CustomUniform(), CustomUniform(),
["conjunction", "disjunction"], CustomUniform(), ]
self.weird_strings=[]
def fit(self, X, y):
print(self.base_estimator.n_estimators)
BalancedBaggingClassifier.fit(self, X, y)
self.feature_importances_ = np.zeros(X.shape[1])
for estim in self.estimators_:
if hasattr(estim['classifier'], 'feature_importances_'):
self.feature_importances_ += estim['classifier'].feature_importances_
self.feature_importances_ /= np.sum(self.feature_importances_)
print('Fitted')
return self
from imblearn.ensemble import BalancedBaggingClassifier
import numpy as np
from ..monoview_classifiers.scm import SCM
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.base import base_boosting_estimators
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
classifier_class_name = "IBSCM"
class IBSCM(BaseMonoviewClassifier, BalancedBaggingClassifier):
def __init__(self, random_state=None, n_estimators=10,
sampling_strategy="auto", replacement=False,
base_estimator=SCM(), **kwargs):
super(IBSCM, self).__init__(random_state=random_state,
base_estimator=base_estimator,
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__model_type",
"base_estimator__max_rules", "base_estimator__p",]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=50),
["auto"],["conjunction", "disjunction"],
CustomRandint(low=1, high=15),
CustomUniform(loc=0, state=1),]
self.weird_strings=[]
def fit(self, X, y):
BalancedBaggingClassifier.fit(self, X, y)
self.feature_importances_ = np.zeros(X.shape[1])
for estim in self.estimators_:
if hasattr(estim['classifier'], 'feature_importances_'):
self.feature_importances_ += estim['classifier'].feature_importances_
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
......@@ -24,6 +24,7 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier):
n_estimators=n_estimators,
max_depth=max_depth,
criterion=criterion,
class_weight="balanced",
random_state=random_state
)
self.param_names = ["n_estimators", "max_depth", "criterion",
......
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from spkm.spkm_wrapper import SPKMlikeSklearn
from spkm.kernels_and_gradients import RBFKernel, PolyKernel
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.hyper_parameter_search import CustomRandint
from ..utils.dataset import get_samples_views_indices
classifier_class_name = "SPKM"
class SPKM(BaseMonoviewClassifier, SPKMlikeSklearn):
def __init__(self, random_state=42, n_u=2, kernel=RBFKernel(0.5),
spkmregP=1, spkminit="randn",
nspkminits=10, preprocessinglist=[0,1,2], **kwargs):
SPKMlikeSklearn.__init__(self, random_state=random_state,
n_u=n_u,
kernel=kernel,
spkmregP=spkmregP,
spkminit=spkminit,
nspkminits=nspkminits,
preprocessinglist=preprocessinglist)
self.param_names = ["n_u", "kernel", "spkmregP",
"spkminit", "nspkminits", "preprocessinglist",
"random_state"]
self.distribs = [[2], [PolyKernel({"d":3, "r":1}), RBFKernel(0.5)],
CustomRandint(-4,4, multiplier='e'),
["data"], [10],
[[0,1],],
[random_state],]
self.more_than_two_views = False
self.weird_strings = []
self.random_state = random_state
def fit(self, X, y):
self.lb = LabelBinarizer(pos_label=1, neg_label=-1)
y = self.lb.fit_transform(y)
return SPKMlikeSklearn.fit(self, X, y[:,0],)
def predict(self, X, preprocess=True):
return self.lb.inverse_transform(np.sign(SPKMlikeSklearn.predict(self, X)))
def get_interpretation(self, directory, base_file_name, labels, multiclass=False):
u = self.feature_interpretability()
importances_sum = np.sum(u)
self.feature_importances_ = u/importances_sum
return ""
def accepts_multi_class(self, random_state, n_samples=10, dim=2,
n_classes=3, n_views=2):
return False
......@@ -17,9 +17,39 @@ __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def init_constants(kwargs, classification_indices, metrics,
name, nb_cores, k_folds,
dataset_var, directory):
class MultiViewExp:
def __init__(self, classifier_name="decision_tree",
classifier_config={"depth":3}, view_names=[],
view_indices=[0], nb_class=2, hps_kwargs={}, train_size=0.8,
labels_dictionary={}, database_name="",
hps_type="Random", nb_cores=1, metrics={},
equivalent_draws=False):
self.classifier_name = classifier_name
self.classifier_config=classifier_config
self.view_names=view_names
self.view_indices=view_indices
self.nb_class=nb_class
self.hps_kwargs=hps_kwargs
self.train_size=train_size
self.labels_dictionary=labels_dictionary
self.directory=None
self.database_name=database_name
self.k_folds=None
self.split=None
self.hps_type = hps_type
self.nb_cores=nb_cores
self.metrics=metrics
self.equivalent_draws=equivalent_draws
def add_bootstrap_info(self, directory="", k_folds=[], splits=[],
random_state=42):
self.directory = directory
self.k_folds=k_folds
self.splits=splits
self.random_state = random_state
def init_constants(self, dataset_var ):
"""
Used to init the constants
Parameters
......@@ -44,34 +74,27 @@ def init_constants(kwargs, classification_indices, metrics,
tuple of (classifier_name, t_start, views_indices,
classifier_config, views, learning_rate)
"""
views = kwargs["view_names"]
views_indices = kwargs["view_indices"]
if metrics is None:
metrics = {"f1_score*": {}}
classifier_name = kwargs["classifier_name"]
classifier_config = kwargs[classifier_name]
learning_rate = len(classification_indices[0]) / float(
(len(classification_indices[0]) + len(classification_indices[1])))
# learning_rate = len(self.split[0]) / float(
# (len(classification_indices[0]) + len(classification_indices[1])))
t_start = time.time()
logging.info("Info\t: Classification - Database : " + str(
name) + " ; Views : " + ", ".join(views) +
" ; Algorithm : " + classifier_name + " ; Cores : " + str(
nb_cores) + ", Train ratio : " + str(learning_rate) +
", CV on " + str(k_folds.n_splits) + " folds")
self.database_name) + " ; Views : " + ", ".join(self.view_names) +
" ; Algorithm : " + self.classifier_name + " ; Cores : " + str(
self.nb_cores) + ", Train ratio : " + str(self.train_size) +
", CV on " + str(self.k_folds.n_splits) + " folds")
for view_index, view_name in zip(views_indices, views):
for view_index, view_name in zip(self.view_indices, self.view_names):
logging.info("Info:\t Shape of " + str(view_name) + " :" + str(
dataset_var.get_shape(view_index)))
labels = dataset_var.get_labels()
directory = os.path.join(directory, classifier_name)
base_file_name = classifier_name + "-" + dataset_var.get_name() + "-"
output_file_name = os.path.join(directory, base_file_name)
return classifier_name, t_start, views_indices, \
classifier_config, views, learning_rate, labels, output_file_name, \
directory, base_file_name, metrics
def save_results(string_analysis, images_analysis, output_file_name,
# labels = dataset_var.get_labels()
self.directory = os.path.join(self.directory, self.classifier_name)
self.base_file_name = self.classifier_name + "-" + dataset_var.get_name() + "-"
self.output_file_name = os.path.join(self.directory, self.base_file_name)
# return classifier_name, t_start, views_indices, \
# classifier_config, views, learning_rate, labels, output_file_name, \
# directory, base_file_name, metrics
def save_results(self, string_analysis, images_analysis,
confusion_matrix): # pragma: no cover
"""
Save results in derectory
......@@ -101,19 +124,19 @@ def save_results(string_analysis, images_analysis, output_file_name,
"""
logging.info(string_analysis)
secure_file_path(output_file_name)
output_text_file = open(output_file_name + 'summary.txt', 'w',
secure_file_path(self.output_file_name)
output_text_file = open(self.output_file_name + 'summary.txt', 'w',
encoding="utf-8")
output_text_file.write(string_analysis)
output_text_file.close()
np.savetxt(output_file_name + "confusion_matrix.csv", confusion_matrix,
np.savetxt(self.output_file_name + "confusion_matrix.csv", confusion_matrix,
delimiter=',')
if images_analysis is not None:
for image_name in images_analysis.keys():
if os.path.isfile(output_file_name + image_name + ".png"):
if os.path.isfile(self.output_file_name + image_name + ".png"):
for i in range(1, 20):
test_file_name = output_file_name + image_name + "-" + str(
test_file_name = self.output_file_name + image_name + "-" + str(
i) + ".png"
if not os.path.isfile(test_file_name):
images_analysis[image_name].savefig(test_file_name,
......@@ -121,78 +144,9 @@ def save_results(string_analysis, images_analysis, output_file_name,
break
images_analysis[image_name].savefig(
output_file_name + image_name + '.png', transparent=True)
def exec_multiview_multicore(directory, core_index, name, learning_rate,
nb_folds,
database_type, path, labels_dictionary,
random_state, labels,
hyper_param_search=False, nb_cores=1, metrics=None,
n_iter=30, **arguments): # pragma: no cover
"""
execute multiview process on
Parameters
----------
directory : indicate the directory
self.output_file_name + image_name + '.png', transparent=True)
core_index :
name : name of the data file to perform
learning_rate :
nb_folds :
database_type :
path : path to the data name
labels_dictionary
random_state : int seed, RandomState instance, or None (default=None)
The seed of the pseudo random number multiview_generator to use when
shuffling the data.
labels :
hyper_param_search :
nb_cores : in number of cores
metrics : metric to use
n_iter : int number of iterations
arguments : others arguments
Returns
-------
exec_multiview on directory, dataset_var, name, learning_rate, nb_folds, 1,
database_type, path, labels_dictionary,
random_state, labels,
hyper_param_search=hyper_param_search, metrics=metrics,
n_iter=n_iter, **arguments
"""
"""Used to load an HDF5 dataset_var for each parallel job and execute multiview classification"""
dataset_var = h5py.File(path + name + str(core_index) + ".hdf5", "r")
return exec_multiview(directory, dataset_var, name, learning_rate, nb_folds,
1,
database_type, path, labels_dictionary,
random_state, labels,
hps_method=hyper_param_search,
metrics=metrics,
n_iter=n_iter, **arguments)
def exec_multiview(directory, dataset_var, name, classification_indices,
k_folds,
nb_cores, database_type, path,
labels_dictionary, random_state, labels,
hps_method="None", hps_kwargs={}, metrics=None,
n_iter=30, **kwargs):
def exec(self, dataset_var, ):
"""Used to execute multiview classification and result analysis
Parameters
......@@ -236,78 +190,72 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
``MultiviewResult``
"""
t_start = time.monotonic()
logging.info("Start:\t Initialize constants")
cl_type, \
t_start, \
views_indices, \
classifier_config, \
views, \
learning_rate, \
labels, \
output_file_name, \
directory, \
base_file_name, \
metrics = init_constants(kwargs, classification_indices, metrics, name,
nb_cores, k_folds, dataset_var, directory)
self.init_constants(dataset_var)
logging.info("Done:\t Initialize constants")
extraction_time = time.time() - t_start
logging.info("Info:\t Extraction duration " + str(extraction_time) + "s")
logging.info(
"Info:\t Extraction duration " + str(extraction_time) + "s")
logging.info("Start:\t Getting train/test split")
learning_indices, validation_indices = classification_indices
learning_indices, validation_indices = self.splits
logging.info("Done:\t Getting train/test split")
logging.info("Start:\t Getting classifiers modules")
classifier_module = getattr(multiview_classifiers, cl_type)
classifier_module = getattr(multiview_classifiers, self.classifier_name)
classifier_name = classifier_module.classifier_class_name
logging.info("Done:\t Getting classifiers modules")
logging.info("Start:\t Optimizing hyperparameters")
hps_beg = time.monotonic()
if hps_method != "None":
hps_method_class = getattr(hyper_parameter_search, hps_method)
if self.hps_type != "None":
hps_method_class = getattr(hyper_parameter_search, self.hps_type)
estimator = getattr(classifier_module, classifier_name)(
random_state=random_state,
**classifier_config)
estimator = get_mc_estim(estimator, random_state,
random_state=self.random_state,
**self.classifier_config)
estimator = get_mc_estim(estimator, self.random_state,
multiview=True,
y=dataset_var.get_labels()[learning_indices])
hps = hps_method_class(estimator, scoring=metrics, cv=k_folds,
random_state=random_state, framework="multiview",
n_jobs=nb_cores,
y=dataset_var.get_labels()[
learning_indices])
hps = hps_method_class(estimator, scoring=self.metrics, cv=self.k_folds,
random_state=self.random_state,
framework="multiview",
n_jobs=self.nb_cores,
learning_indices=learning_indices,
view_indices=views_indices, **hps_kwargs)
view_indices=self.view_indices,
**self.hps_kwargs)
hps.fit(dataset_var, dataset_var.get_labels(), )
classifier_config = hps.get_best_params()
hps.gen_report(output_file_name)
hps.gen_report(self.output_file_name)
hps_duration = time.monotonic() - hps_beg
classifier = get_mc_estim(
getattr(classifier_module, classifier_name)(random_state=random_state,
**classifier_config),
random_state, multiview=True,
self.classifier = get_mc_estim(
getattr(classifier_module, classifier_name)(
random_state=self.random_state,
**self.classifier_config),
self.random_state, multiview=True,
y=dataset_var.get_labels())
logging.info("Done:\t Optimizing hyperparameters")
logging.info("Start:\t Fitting classifier")
fit_beg = time.monotonic()
classifier.fit(dataset_var, dataset_var.get_labels(),
self.classifier.fit(dataset_var, dataset_var.get_labels(),
train_indices=learning_indices,
view_indices=views_indices)
view_indices=self.view_indices)
fit_duration = time.monotonic() - fit_beg
logging.info("Done:\t Fitting classifier")
logging.info("Start:\t Predicting")
train_pred = classifier.predict(dataset_var,
train_pred = self.classifier.predict(dataset_var,
sample_indices=learning_indices,
view_indices=views_indices)
view_indices=self.view_indices)
pred_beg = time.monotonic()
test_pred = classifier.predict(dataset_var,
test_pred = self.classifier.predict(dataset_var,
sample_indices=validation_indices,
view_indices=views_indices)
view_indices=self.view_indices)
pred_duration = time.monotonic() - pred_beg
full_pred = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100
full_pred[learning_indices] = train_pred
......@@ -318,34 +266,106 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
logging.info(
"Info:\t Classification duration " + str(extraction_time) + "s")
logging.info("Start:\t Result Analysis for " + cl_type)
logging.info("Start:\t Result Analysis for " + self.classifier_name)
times = (extraction_time, whole_duration)
result_analyzer = MultiviewResultAnalyzer(view_names=views,
classifier=classifier,
classification_indices=classification_indices,
k_folds=k_folds,
hps_method=hps_method,
metrics_dict=metrics,
n_iter=n_iter,
class_label_names=list(
labels_dictionary.values()),
if "n_iter" in self.hps_kwargs:
self.n_iter_hps = self.hps_kwargs["n_iter"]
else:
self.n_iter_hps = 0
result_analyzer = MultiviewResultAnalyzer(view_names=self.view_names,
classifier=self.classifier,
classification_indices=self.splits,
k_folds=self.k_folds,
hps_method=self.hps_type,
metrics_dict=self.metrics,
n_iter=self.n_iter_hps,
class_label_names=[ self.labels_dictionary[ind]
for ind in range(len(self.labels_dictionary))],
pred=full_pred,
directory=directory,
base_file_name=base_file_name,
labels=labels,
directory=self.directory,
base_file_name=self.base_file_name,
labels=dataset_var.get_labels(),
database_name=dataset_var.get_name(),
nb_cores=nb_cores,
nb_cores=self.nb_cores,
duration=whole_duration,
feature_ids=dataset_var.feature_ids)
string_analysis, images_analysis, metrics_scores, class_metrics_scores, \
confusion_matrix = result_analyzer.analyze()
logging.info("Done:\t Result Analysis for " + cl_type)
logging.info("Done:\t Result Analysis for " + self.classifier_name)
logging.info("Start:\t Saving preds")
save_results(string_analysis, images_analysis, output_file_name,
confusion_matrix)
self.save_results(string_analysis, images_analysis, confusion_matrix)
logging.info("Start:\t Saving preds")
return MultiviewResult(cl_type, classifier_config, metrics_scores,
return MultiviewResult(self.classifier_name, self.classifier_config, metrics_scores,
full_pred, hps_duration, fit_duration,
pred_duration, class_metrics_scores, classifier)
pred_duration, class_metrics_scores, self.classifier)
# def exec_multiview_multicore(directory, core_index, name, learning_rate,
# nb_folds,
# database_type, path, labels_dictionary,
# random_state, labels,
# hyper_param_search=False, nb_cores=1, metrics=None,
# n_iter=30, **arguments): # pragma: no cover
# """
# execute multiview process on
#
# Parameters
# ----------
#
# directory : indicate the directory
#
# core_index :
#
# name : name of the data file to perform
#
# learning_rate :
#
# nb_folds :
#
# database_type :
#
# path : path to the data name
#
# labels_dictionary
#
# random_state : int seed, RandomState instance, or None (default=None)
# The seed of the pseudo random number multiview_generator to use when
# shuffling the data.
#
# labels :
#
# hyper_param_search :
#
# nb_cores : in number of cores
#
# metrics : metric to use
#
# n_iter : int number of iterations
#
# arguments : others arguments
#
# Returns
# -------
# exec_multiview on directory, dataset_var, name, learning_rate, nb_folds, 1,
# database_type, path, labels_dictionary,
# random_state, labels,
# hyper_param_search=hyper_param_search, metrics=metrics,
# n_iter=n_iter, **arguments
# """
# """Used to load an HDF5 dataset_var for each parallel job and execute multiview classification"""
# dataset_var = h5py.File(path + name + str(core_index) + ".hdf5", "r")
# return exec_multiview(directory, dataset_var, name, learning_rate, nb_folds,
# 1,
# database_type, path, labels_dictionary,
# random_state, labels,
# hps_method=hyper_param_search,
# metrics=metrics,
# n_iter=n_iter, **arguments)
......@@ -111,8 +111,13 @@ class LateFusionClassifier(BaseMultiviewClassifier, BaseFusionClassifier):
for view_index, monoview_estimator
in zip(view_indices,
self.monoview_estimators)]
self.get_feature_importance()
return self
def get_feature_importance(self):
self.feature_importances_ = np.concatenate([clf.feature_importances_ for clf in self.monoview_estimators])
self.feature_importances_/=np.sum(self.feature_importances_)
def init_params(self, nb_view, mutliclass=False):
if self.weights is None:
self.weights = np.ones(nb_view) / nb_view
......
......@@ -29,7 +29,7 @@ class SampledPWSPKM(PWSPKM,):
self.rus = RandomUnderSampler(random_state=random_state)
def fit(self, X, y, train_indices=None, view_indices=None):
self.spkmregP=10
self.lb = LabelBinarizer(pos_label=1, neg_label=-1)
y = self.lb.fit_transform(y)
train_indices, view_indices = get_samples_views_indices(X,
......
from .additions.early_fusion_from_monoview import BaseEarlyFusion
from ..utils.hyper_parameter_search import CustomRandint
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionIBDT"
class EarlyFusionIBDT(BaseEarlyFusion):
def __init__(self, random_state=None, max_depth=None, n_estimators=10,
sampling_strategy="auto", replacement=False,
criterion='gini', splitter='best', **kwargs):
BaseEarlyFusion.__init__(self, random_state=random_state,
monoview_classifier="ib_decision_tree",
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement,
base_estimator__max_depth=max_depth,
base_estimator__criterion=criterion,
base_estimator__splitter=splitter, **kwargs)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__max_depth",
"base_estimator__criterion",
"base_estimator__splitter",
'random_state']
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=50),
["auto"],CustomRandint(low=1, high=300),
["gini", "entropy"],
["best", "random"], [random_state]]
self.weird_strings = {}
\ No newline at end of file
from .additions.early_fusion_from_monoview import BaseEarlyFusion
from ..utils.hyper_parameter_search import CustomRandint
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionIBRF"
class EarlyFusionIBRF(BaseEarlyFusion):
def __init__(self, random_state=None, max_depth=None, n_estimators=10,
sampling_strategy="auto", replacement=False, **kwargs):
BaseEarlyFusion.__init__(self, random_state=random_state,
monoview_classifier="ib_random_forest",
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement,
base_estimator__max_depth=max_depth,
base_estimator__criterion="gini",
base_estimator__splitter='best', **kwargs)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__n_estimators",
"base_estimator__max_depth",
"base_estimator__criterion",
'random_state']
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=50),
["auto"],CustomRandint(low=1, high=300),
CustomRandint(low=1, high=10),
["gini", "entropy"], [random_state]]
self.weird_strings = {}
\ No newline at end of file
from .additions.early_fusion_from_monoview import BaseEarlyFusion
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionIBRSCM"
class EarlyFusionIBRSCM(BaseEarlyFusion):
def __init__(self, random_state=None, n_estimators=10,
sampling_strategy="auto", replacement=False, **kwargs):
BaseEarlyFusion.__init__(self, random_state=random_state,
monoview_classifier="ib_random_scm",
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement,
**kwargs)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__n_estimators",
"base_estimator__max_rules",
"base_estimator__max_samples",
"base_estimator__max_features",
"base_estimator__model_type",
"base_estimator__p_options",
'random_state']
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=50),
["auto"],CustomRandint(low=1, high=300),
CustomRandint(low=1, high=20),
CustomUniform(), CustomUniform(),
["conjunction", "disjunction"], CustomUniform(),
[random_state]]
self.weird_strings = {}
\ No newline at end of file
from .additions.early_fusion_from_monoview import BaseEarlyFusion
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
# from ..utils.dataset import get_v
classifier_class_name = "EarlyFusionIBSCM"
class EarlyFusionIBSCM(BaseEarlyFusion):
def __init__(self, random_state=None, n_estimators=10,
sampling_strategy="auto", replacement=False, **kwargs):
BaseEarlyFusion.__init__(self, random_state=random_state,
monoview_classifier="ib_scm",
n_estimators=n_estimators,
sampling_strategy=sampling_strategy,
replacement=replacement,
**kwargs)
self.param_names = ["n_estimators", "sampling_strategy",
"base_estimator__model_type",
"base_estimator__max_rules", "base_estimator__p",
'random_state']
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=50),
["auto"],["conjunction", "disjunction"],
CustomRandint(low=1, high=15),
CustomUniform(loc=0, state=1),
[random_state]]
self.weird_strings = {}
\ No newline at end of file
......@@ -28,16 +28,19 @@ class PWSPKM(BaseMultiviewClassifier, pairwiseSPKMlikeSklearn):
self.param_names = ["n_u", "kernel1", "kernel2", "spkmregP",
"spkminit", "nspkminits", "preprocessinglist",
"random_state"]
self.distribs = [[2], [PolyKernel({"d":3, "r":1}), RBFKernel(0.5)], [PolyKernel({"d":3, "r":1}), RBFKernel(0.5)], CustomRandint(-2,2, multiplier='e'),
self.distribs = [[2], [PolyKernel({"d":3, "r":1}), RBFKernel(0.5)],
[PolyKernel({"d":3, "r":1}), RBFKernel(0.5)],
CustomRandint(-4,4, multiplier='e'),
["data"], [10],
[[], [0], [1], [2], [0,1], [0,1,2], [0,2], [1,2]], [random_state],]
[[0,1],],
[random_state],]
self.more_than_two_views = False
self.random_state = random_state
def fit(self, X, y, train_indices=None, view_indices=None):
self.lb = LabelBinarizer(pos_label=1, neg_label=-1)
y = self.lb.fit_transform(y)
print(np.unique(y))
train_indices, view_indices = get_samples_views_indices(X,
train_indices,
view_indices)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment