Skip to content
Snippets Groups Projects
Commit 43493f51 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Clean

parent 4298c622
No related branches found
No related tags found
No related merge requests found
Pipeline #9015 failed
......@@ -203,6 +203,7 @@ def get_random_hps_args(hps_args, classifier_name):
hps_dict["param_distributions"] = value
return hps_dict
def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class,
view_index, view_name, hps_kwargs):
if classifier_name in arguments:
......@@ -430,116 +431,6 @@ def benchmark_init(directory, classification_indices, labels, labels_dictionary,
return results_monoview, labels_names
# def exec_one_benchmark(core_index=-1, labels_dictionary=None, directory=None,
# classification_indices=None, args=None,
# k_folds=None, random_state=None, hyper_param_search=None,
# metrics=None, argument_dictionaries=None,
# benchmark=None, views=None, views_indices=None, flag=None,
# labels=None,
# exec_monoview_multicore=exec_monoview_multicore,
# exec_multiview_multicore=exec_multiview_multicore,):
# """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
# exec_multiview_multicore args are only used for tests"""
#
# results_monoview, labels_names = benchmark_init(directory,
# classification_indices, labels,
# labels_dictionary, k_folds)
#
# logging.debug("Start:\t monoview benchmark")
# results_monoview += [
# exec_monoview_multicore(directory, args["name"], labels_names,
# classification_indices, k_folds,
# core_index, args["file_type"], args["pathf"], random_state,
# labels,
# hyper_param_search=hyper_param_search,
# metrics=metrics,
# n_iter=args["hps_iter"], **argument)
# for argument in argument_dictionaries["Monoview"]]
# logging.debug("Done:\t monoview benchmark")
#
#
# logging.debug("Start:\t multiview benchmark")
# results_multiview = [
# exec_multiview_multicore(directory, core_index, args["name"],
# classification_indices, k_folds, args["file_type"],
# args["pathf"], labels_dictionary, random_state,
# labels, hyper_param_search=hyper_param_search,
# metrics=metrics, n_iter=args["hps_iter"],
# **arguments)
# for arguments in argument_dictionaries["multiview"]]
# logging.debug("Done:\t multiview benchmark")
#
# return [flag, results_monoview + results_multiview]
#
#
# def exec_one_benchmark_multicore(nb_cores=-1, labels_dictionary=None,
# directory=None, classification_indices=None,
# args=None,
# k_folds=None, random_state=None,
# hyper_param_search=None, metrics=None,
# argument_dictionaries=None,
# benchmark=None, views=None, views_indices=None,
# flag=None, labels=None,
# exec_monoview_multicore=exec_monoview_multicore,
# exec_multiview_multicore=exec_multiview_multicore,):
# """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
# exec_multiview_multicore args are only used for tests"""
#
# results_monoview, labels_names = benchmark_init(directory,
# classification_indices, labels,
# labels_dictionary, k_folds)
#
# logging.debug("Start:\t monoview benchmark")
# nb_experiments = len(argument_dictionaries["monoview"])
# nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores))
# for step_index in range(nb_multicore_to_do):
# results_monoview += (Parallel(n_jobs=nb_cores)(
# delayed(exec_monoview_multicore)(directory, args["name"], labels_names,
# classification_indices, k_folds,
# core_index, args["file_type"], args["pathf"],
# random_state, labels,
# hyper_param_search=hyper_param_search,
# metrics=metrics,
# n_iter=args["hps_iter"],
# **argument_dictionaries["monoview"][
# core_index + step_index * nb_cores])
# for core_index in
# range(min(nb_cores, nb_experiments - step_index * nb_cores))))
# logging.debug("Done:\t monoview benchmark")
#
# logging.debug("Start:\t multiview arguments initialization")
# # argument_dictionaries = initMultiviewArguments(args, benchmark, views,
# # views_indices,
# # argument_dictionaries,
# # random_state, directory,
# # resultsMonoview,
# # classification_indices)
# logging.debug("Done:\t multiview arguments initialization")
#
# logging.debug("Start:\t multiview benchmark")
# results_multiview = []
# nb_experiments = len(argument_dictionaries["multiview"])
# nb_multicore_to_do = int(math.ceil(float(nb_experiments) / nb_cores))
# for step_index in range(nb_multicore_to_do):
# results_multiview += Parallel(n_jobs=nb_cores)(
# delayed(exec_multiview_multicore)(directory, core_index, args["name"],
# classification_indices, k_folds,
# args["file_type"], args["Base"]["pathf"],
# labels_dictionary, random_state,
# labels,
# hyper_param_search=hyper_param_search,
# metrics=metrics,
# n_iter=args["hps_iter"],
# **
# argument_dictionaries["multiview"][
# step_index * nb_cores + core_index])
# for core_index in
# range(min(nb_cores, nb_experiments - step_index * nb_cores)))
# logging.debug("Done:\t multiview benchmark")
#
# return [flag, results_monoview + results_multiview]
def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
directory=None, classification_indices=None,
args=None,
......@@ -548,7 +439,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
argument_dictionaries=None,
benchmark=None, views=None, views_indices=None,
flag=None, labels=None,
track_tracebacks=False): # pragma: no cover
track_tracebacks=False, nb_cores=1): # pragma: no cover
results_monoview, labels_names = benchmark_init(directory,
classification_indices,
labels,
......@@ -564,7 +455,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
results_monoview += [
exec_monoview(directory, X, Y, args["name"], labels_names,
classification_indices, k_folds,
1, args["file_type"], args["pathf"], random_state,
nb_cores, args["file_type"], args["pathf"], random_state,
hyper_param_search=hyper_param_search,
metrics=metrics,
**arguments)]
......@@ -575,19 +466,8 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
"view_name"]] = traceback.format_exc()
else:
raise
logging.info("Done:\t monoview benchmark")
logging.info("Start:\t multiview arguments initialization")
# argument_dictionaries = initMultiviewArguments(args, benchmark, views,
# views_indices,
# argument_dictionaries,
# random_state, directory,
# resultsMonoview,
# classification_indices)
logging.info("Done:\t multiview arguments initialization")
logging.info("Start:\t multiview benchmark")
results_multiview = []
for arguments in argument_dictionaries["multiview"]:
......@@ -595,7 +475,7 @@ def exec_one_benchmark_mono_core(dataset_var=None, labels_dictionary=None,
results_multiview += [
exec_multiview(directory, dataset_var, args["name"],
classification_indices,
k_folds, 1, args["file_type"],
k_folds, nb_cores, args["file_type"],
args["pathf"], labels_dictionary, random_state,
labels,
hps_method=hyper_param_search,
......@@ -660,26 +540,10 @@ def exec_benchmark(nb_cores, stats_iter,
"""
logging.info("Start:\t Executing all the needed benchmarks")
results = []
# if nb_cores > 1:
# if stats_iter > 1 or nb_multiclass > 1:
# nb_exps_to_do = len(benchmark_arguments_dictionaries)
# nb_multicore_to_do = range(int(math.ceil(float(nb_exps_to_do) / nb_cores)))
# for step_index in nb_multicore_to_do:
# results += (Parallel(n_jobs=nb_cores)(delayed(exec_one_benchmark)
# (core_index=core_index,
# **
# benchmark_arguments_dictionaries[
# core_index + step_index * nb_cores])
# for core_index in range(
# min(nb_cores, nb_exps_to_do - step_index * nb_cores))))
# else:
# results += [exec_one_benchmark_multicore(nb_cores=nb_cores, **
# benchmark_arguments_dictionaries[0])]
# else:
for arguments in benchmark_arguments_dictionaries:
benchmark_results = exec_one_benchmark_mono_core(
dataset_var=dataset_var,
track_tracebacks=track_tracebacks,
track_tracebacks=track_tracebacks, nb_cores=nb_cores,
**arguments)
analyze_iterations([benchmark_results],
benchmark_arguments_dictionaries, stats_iter,
......@@ -697,7 +561,6 @@ def exec_benchmark(nb_cores, stats_iter,
dataset_var.sample_ids,
dataset_var.get_labels())
logging.info("Done:\t Analyzing predictions")
delete(benchmark_arguments_dictionaries, nb_cores, dataset_var)
return results_mean_stds
......@@ -816,11 +679,5 @@ def exec_classif(arguments): # pragma: no cover
stats_iter_random_states, metrics,
argument_dictionaries, benchmark,
views, views_indices)
results_mean_stds = exec_benchmark(
nb_cores, stats_iter,
benchmark_argument_dictionaries, directory, metrics,
dataset_var,
args["track_tracebacks"])
# noise_results.append([noise_std, results_mean_stds])
# plot_results_noise(directory, noise_results, metrics[0][0],
# dataset_name)
exec_benchmark(nb_cores, stats_iter, benchmark_argument_dictionaries,
directory, metrics, dataset_var, args["track_tracebacks"])
......@@ -36,8 +36,8 @@ def exec_monoview_multicore(directory, name, labels_names,
**args): # pragma: no cover
dataset_var = HDF5Dataset(
hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5", "r"))
neededViewIndex = args["view_index"]
X = dataset_var.get_v(neededViewIndex)
needed_view_index = args["view_index"]
X = dataset_var.get_v(needed_view_index)
Y = labels
return exec_monoview(directory, X, Y, name, labels_names,
classification_indices, k_folds, 1, database_type,
......@@ -61,8 +61,8 @@ def exec_monoview(directory, X, Y, database_name, labels_names,
view_name, \
classifier_name, \
X, \
learningRate, \
labelsString, \
learning_rate, \
labels_string, \
output_file_name, \
directory, \
base_file_name = init_constants(args, X, classification_indices,
......@@ -74,7 +74,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names,
"Info:\t Classification - Database:" + str(
database_name) + " View:" + str(
view_name) + " train ratio:"
+ str(learningRate) + ", CrossValidation k-folds: " + str(
+ str(learning_rate) + ", CrossValidation k-folds: " + str(
k_folds.n_splits) + ", cores:"
+ str(nb_cores) + ", algorithm : " + classifier_name)
......@@ -110,7 +110,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names,
random_state,
y=Y)
fit_beg = time.monotonic()
classifier.fit(X_train, y_train) # NB_CORES=nbCores,
classifier.fit(X_train, y_train)
fit_duration = time.monotonic() - fit_beg
logging.info("Done:\t Training")
......@@ -122,10 +122,10 @@ def exec_monoview(directory, X, Y, database_name, labels_names,
# Filling the full prediction in the right order
full_pred = np.zeros(Y.shape, dtype=int) - 100
for trainIndex, index in enumerate(classification_indices[0]):
full_pred[index] = train_pred[trainIndex]
for testIndex, index in enumerate(classification_indices[1]):
full_pred[index] = test_pred[testIndex]
for train_index, index in enumerate(classification_indices[0]):
full_pred[index] = train_pred[train_index]
for test_index, index in enumerate(classification_indices[1]):
full_pred[index] = test_pred[test_index]
logging.info("Done:\t Predicting")
......@@ -185,7 +185,8 @@ def init_constants(args, X, classification_indices, labels_names,
base_file_name = cl_type_string + '-' + name + "-" + view_name + "-"
output_file_name = os.path.join(directory, base_file_name)
secure_file_path(output_file_name)
return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string, output_file_name, directory, base_file_name
return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string,\
output_file_name, directory, base_file_name
def init_train_test(X, Y, classification_indices):
......
......@@ -13,8 +13,6 @@ __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25
def change_label_to_minus(y):
"""
Change the label 0 to minus one
......@@ -76,43 +74,6 @@ def gen_test_folds_preds(X_train, y_train, KFolds, estimator):
return test_folds_preds
# class CustomRandint:
# """Used as a distribution returning a integer between low and high-1.
# It can be used with a multiplier agrument to be able to perform more complex generation
# for example 10 e -(randint)"""
#
# def __init__(self, low=0, high=0, multiplier=""):
# self.randint = randint(low, high)
# self.multiplier = multiplier
#
# def rvs(self, random_state=None):
# randinteger = self.randint.rvs(random_state=random_state)
# if self.multiplier == "e-":
# return 10 ** -randinteger
# else:
# return randinteger
#
# def get_nb_possibilities(self):
# return self.randint.b - self.randint.a
#
#
# class CustomUniform:
# """Used as a distribution returning a float between loc and loc + scale..
# It can be used with a multiplier agrument to be able to perform more complex generation
# for example 10 e -(float)"""
#
# def __init__(self, loc=0, state=1, multiplier=""):
# self.uniform = uniform(loc, state)
# self.multiplier = multiplier
#
# def rvs(self, random_state=None):
# unif = self.uniform.rvs(random_state=random_state)
# if self.multiplier == 'e-':
# return 10 ** -unif
# else:
# return unif
class BaseMonoviewClassifier(BaseClassifier):
def get_feature_importance(self, directory, base_file_name,
......
......@@ -260,11 +260,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
logging.info("Done:\t Getting train/test split")
logging.info("Start:\t Getting classifiers modules")
# classifierPackage = getattr(multiview_classifiers,
# CL_type) # Permet d'appeler un module avec une string
classifier_module = getattr(multiview_classifiers, cl_type)
classifier_name = classifier_module.classifier_class_name
# classifierClass = getattr(classifierModule, CL_type + "Class")
logging.info("Done:\t Getting classifiers modules")
logging.info("Start:\t Optimizing hyperparameters")
......@@ -285,13 +282,6 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
hps.fit(dataset_var, dataset_var.get_labels(), )
classifier_config = hps.get_best_params()
hps.gen_report(output_file_name)
# classifier_config = hyper_parameter_search.search_best_settings(
# dataset_var, dataset_var.get_labels(), classifier_module,
# classifier_name,
# metrics[0], learning_indices, k_folds, random_state,
# output_file_name, nb_cores=nb_cores, views_indices=views_indices,
# searching_tool=hps_method, n_iter=n_iter,
# classifier_config=classifier_config)
hps_duration = time.monotonic() - hps_beg
classifier = get_mc_estim(
getattr(classifier_module, classifier_name)(random_state=random_state,
......@@ -325,8 +315,6 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
logging.info(
"Info:\t Classification duration " + str(extraction_time) + "s")
# TODO: get better cltype
logging.info("Start:\t Result Analysis for " + cl_type)
times = (extraction_time, whole_duration)
result_analyzer = MultiviewResultAnalyzer(view_names=views,
......
......@@ -7,13 +7,6 @@ from .. import monoview_classifiers
from ..utils.base import BaseClassifier, ResultAnalyser
from ..utils.dataset import RAMDataset
# class FakeEstimator():
#
# def predict(self, X, sample_indices=None, view_indices=None):
# return np.zeros(sample_indices.shape[0])
class BaseMultiviewClassifier(BaseClassifier):
"""
BaseMultiviewClassifier base of Multiview classifiers
......@@ -49,21 +42,6 @@ class BaseMultiviewClassifier(BaseClassifier):
'Used {} views to fit, and trying to predict on {}'.format(
self.used_views, view_indices))
# def to_str(self, param_name):
# if param_name in self.weird_strings:
# string = ""
# if "class_name" in self.weird_strings[param_name]:
# string += self.get_params()[param_name].__class__.__name__
# if "config" in self.weird_strings[param_name]:
# string += "( with " + self.get_params()[
# param_name].params_to_string() + ")"
# else:
# string += self.weird_strings[param_name](
# self.get_params()[param_name])
# return string
# else:
# return str(self.get_params()[param_name])
def accepts_multi_class(self, random_state, n_samples=10, dim=2,
n_classes=3, n_views=2):
if int(n_samples / n_classes) < 1:
......
......@@ -199,7 +199,6 @@ def plot_2d(data, classifiers_names, nb_classifiers, file_name, dataset_name, la
plt.close()
# The following part is used to generate an interactive graph.
if use_plotly:
# [np.where(labels==i)[0] for i in np.unique(labels)]
hover_text = [[sample_ids[sample_index] + " failed " + str(
stats_iter - data[
sample_index, classifier_index]) + " time(s), labelled " + str(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment