diff --git a/config_files/config_test.yml b/config_files/config_test.yml index f2cfe3404c47d40c51398a2090a6f54d7d35780d..fcd7ddf74d9e8c5867ae1a5169d525b23e26a5f2 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -27,7 +27,7 @@ algos_multiview: ["weighted_linear_early_fusion",] stats_iter: 2 metrics: ["accuracy_score", "f1_score"] metric_princ: "accuracy_score" -hps_type: "None" +hps_type: "randomized_search-equiv" hps_iter: 1 diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index 5cad4079c46ad14e20ccde8cd5591658e586fde2..88bcf2799c86f38e053ef00fe23ecfeeb61384e7 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -914,11 +914,12 @@ def exec_classif(arguments): "log_loss", "matthews_corrcoef", "roc_auc_score"]] - metrics = [[metricName] for metricName in metrics_names] + metrics = [[metricName, {}] for metricName in metrics_names] metrics = arange_metrics(metrics, args["metric_princ"]) + # TODO : Metric args for metricIndex, metric in enumerate(metrics): if len(metric) == 1: - metrics[metricIndex] = [metric[0], None] + metrics[metricIndex] = [metric[0], {}] benchmark = init_benchmark(cl_type, monoview_algos, multiview_algos, args) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py index 45f13a5ceca6765bb1748d502684decd16aec587..e9087c24fe0e00a6fa7210da8914c6b9da5076fc 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py @@ -1,6 +1,7 @@ from datetime import timedelta as hms from .. import metrics +from ..utils.base import get_metric def get_db_config_string(name, feat, classification_indices, shape, @@ -73,8 +74,7 @@ def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list, feat, cl_type, cl_kwargs, class_labels_names, shape, y_train, y_train_pred, y_test, y_test_pred, time, random_state, classifier, output_file_name): - metrics_scores = {} - metric_module = getattr(metrics, metrics_list[0][0]) + metric_module, metric_kwargs = get_metric(metrics_list) train_score = metric_module.score(y_train, y_train_pred) test_score = metric_module.score(y_test, y_test_pred) string_analysis = "Classification on " + name + " database for " + feat + " with " + cl_type + ".\n\n" @@ -88,19 +88,13 @@ def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list, grid_search, nb_cores, n_iter, cl_kwargs, classifier, output_file_name, y_test) string_analysis += classifier_config_string + metrics_scores = {} for metric in metrics_list: metric_string, metric_score = get_metric_score(metric, y_train, y_train_pred, y_test, y_test_pred) string_analysis += metric_string metrics_scores[metric[0]] = metric_score - # string_analysis += getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) - # if metric[1] is not None: - # metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) - # else: - # metricKWARGS = {} - # metrics_scores[metric[0]] = [getattr(metrics, metric[0]).score(y_train, y_train_pred), - # getattr(metrics, metric[0]).score(y_test, y_test_pred)] string_analysis += "\n\n Classification took " + str(hms(seconds=int(time))) string_analysis += "\n\n Classifier Interpretation : \n" string_analysis += classifier_intepret_string diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 41f6cb13bcf515930f069e2ec2ec5ac922c91436..f673f0631798dfd6e4b698f0f080caafc10b649b 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -12,8 +12,7 @@ import h5py # Import 3rd party modules import numpy as np # for reading CSV-files and Series -from . import monoview_utils -from .analyze_result import execute +from .monoview_utils import MonoviewResult, MonoviewResultAnalyzer # Import own modules from .. import monoview_classifiers from ..utils import hyper_parameter_search @@ -51,8 +50,8 @@ def exec_monoview_multicore(directory, name, labels_names, **args) -def exec_monoview(directory, X, Y, name, labels_names, classification_indices, - KFolds, nbCores, databaseType, path, +def exec_monoview(directory, X, Y, database_name, labels_names, classification_indices, + k_folds, nb_cores, databaseType, path, random_state, hyper_param_search="randomized_search", metrics=[["accuracy_score", None]], n_iter=30, view_name="", **args): @@ -64,17 +63,17 @@ def exec_monoview(directory, X, Y, name, labels_names, classification_indices, X, \ learningRate, \ labelsString, \ - outputFileName = initConstants(args, X, classification_indices, - labels_names, - name, directory, view_name) + output_file_name = init_constants(args, X, classification_indices, + labels_names, + database_name, directory, view_name) logging.debug("Done:\t Loading data") logging.debug( - "Info:\t Classification - Database:" + str(name) + " View:" + str( + "Info:\t Classification - Database:" + str(database_name) + " View:" + str( view_name) + " train ratio:" + str(learningRate) + ", CrossValidation k-folds: " + str( - KFolds.n_splits) + ", cores:" - + str(nbCores) + ", algorithm : " + classifier_name) + k_folds.n_splits) + ", cores:" + + str(nb_cores) + ", algorithm : " + classifier_name) logging.debug("Start:\t Determine Train/Test split") X_train, y_train, X_test, y_test = init_train_test(X, Y, @@ -89,12 +88,12 @@ def exec_monoview(directory, X, Y, name, labels_names, classification_indices, logging.debug("Start:\t Generate classifier args") classifier_module = getattr(monoview_classifiers, classifier_name) classifier_class_name = classifier_module.classifier_class_name - cl_kwargs, test_folds_preds = getHPs(classifier_module, hyper_param_search, - n_iter, classifier_name, - classifier_class_name, - X_train, y_train, - random_state, outputFileName, - KFolds, nbCores, metrics, kwargs) + cl_kwargs, test_folds_preds = get_hyper_params(classifier_module, hyper_param_search, + n_iter, classifier_name, + classifier_class_name, + X_train, y_train, + random_state, output_file_name, + k_folds, nb_cores, metrics, kwargs) logging.debug("Done:\t Generate classifier args") logging.debug("Start:\t Training") @@ -109,50 +108,60 @@ def exec_monoview(directory, X, Y, name, labels_names, classification_indices, logging.debug("Done:\t Training") logging.debug("Start:\t Predicting") - y_train_pred = classifier.predict(X_train) - y_test_pred = classifier.predict(X_test) + train_pred = classifier.predict(X_train) + test_pred = classifier.predict(X_test) # Filling the full prediction in the right order full_pred = np.zeros(Y.shape, dtype=int) - 100 for trainIndex, index in enumerate(classification_indices[0]): - full_pred[index] = y_train_pred[trainIndex] + full_pred[index] = train_pred[trainIndex] for testIndex, index in enumerate(classification_indices[1]): - full_pred[index] = y_test_pred[testIndex] + full_pred[index] = test_pred[testIndex] logging.debug("Done:\t Predicting") - t_end = time.time() - t_start + duration = time.time() - t_start logging.debug( - "Info:\t Time for training and predicting: " + str(t_end) + "[s]") + "Info:\t Time for training and predicting: " + str(duration) + "[s]") logging.debug("Start:\t Getting results") - string_analysis, \ - images_analysis, \ - metrics_scores = execute(name, classification_indices, KFolds, nbCores, - hyper_parameter_search, metrics, n_iter, view_name, - classifier_name, - cl_kwargs, labels_names, X.shape, - y_train, y_train_pred, y_test, y_test_pred, t_end, - random_state, classifier, outputFileName) + result_analyzer = MonoviewResultAnalyzer(view_name=view_name, + classifier_name=classifier_name, + shape=X.shape, + classifier=classifier, + classification_indices=classification_indices, + k_folds=k_folds, + hps_method=hyper_param_search, + metrics_list=metrics, + n_iter=n_iter, + class_label_names=labels_names, + train_pred=train_pred, + test_pred=test_pred, + directory=output_file_name, + labels=Y, + database_name=database_name, + nb_cores=nb_cores, + duration=duration) + string_analysis, images_analysis, metrics_scores = result_analyzer.analyze() logging.debug("Done:\t Getting results") logging.debug("Start:\t Saving preds") - save_results(string_analysis, outputFileName, full_pred, y_train_pred, + save_results(string_analysis, output_file_name, full_pred, train_pred, y_train, images_analysis, y_test) logging.info("Done:\t Saving results") view_index = args["view_index"] if test_folds_preds is None: - test_folds_preds = y_train_pred - return monoview_utils.MonoviewResult(view_index, classifier_name, view_name, + test_folds_preds = train_pred + return MonoviewResult(view_index, classifier_name, view_name, metrics_scores, full_pred, cl_kwargs, test_folds_preds, classifier, X_train.shape[1]) -def initConstants(args, X, classification_indices, labels_names, - name, directory, view_name): +def init_constants(args, X, classification_indices, labels_names, + name, directory, view_name): try: kwargs = args["args"] except KeyError: @@ -184,10 +193,10 @@ def init_train_test(X, Y, classification_indices): return X_train, y_train, X_test, y_test -def getHPs(classifier_module, hyper_param_search, nIter, classifier_module_name, - classifier_class_name, X_train, y_train, - random_state, - output_file_name, k_folds, nb_cores, metrics, kwargs): +def get_hyper_params(classifier_module, hyper_param_search, nIter, classifier_module_name, + classifier_class_name, X_train, y_train, + random_state, + output_file_name, k_folds, nb_cores, metrics, kwargs): if hyper_param_search != "None": logging.debug( "Start:\t " + hyper_param_search + " best settings with " + str( diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 4a0a3b8e0f345d1dd807991461c5bcdaebacaf17..6912bc98c20f93a1278f9ee5fdedd3906512c652 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -1,14 +1,13 @@ import pickle - import matplotlib.pyplot as plt import numpy as np from matplotlib.ticker import FuncFormatter from scipy.stats import uniform, randint -from ..utils.base import BaseClassifier +from ..utils.base import BaseClassifier, ResultAnalyser # Author-Info -__author__ = "Nikolas Huelsmann, Baptiste Bauvin" +__author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype @@ -114,14 +113,9 @@ class CustomUniform: class BaseMonoviewClassifier(BaseClassifier): - def get_config(self): - if self.param_names: - return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() - else: - return "\n\t\t- " + self.__class__.__name__ + "with no config." - def get_feature_importance(self, directory, nb_considered_feats=50): - """Used to generate a graph and a pickle dictionary representing feature importances""" + """Used to generate a graph and a pickle dictionary representing + feature importances""" feature_importances = self.feature_importances_ sorted_args = np.argsort(-feature_importances) feature_importances_sorted = feature_importances[sorted_args][ @@ -203,3 +197,28 @@ def get_accuracy_graph(plotted_data, classifier_name, file_name, ax.legend((scat,), (name,)) f.savefig(file_name, transparent=True) plt.close() + + +class MonoviewResultAnalyzer(ResultAnalyser): + + def __init__(self, view_name, classifier_name, shape, classifier, + classification_indices, k_folds, hps_method, metrics_list, + n_iter, class_label_names, train_pred, test_pred, + directory, labels, database_name, nb_cores, duration): + ResultAnalyser.__init__(self, classifier, classification_indices, + k_folds, hps_method, metrics_list, n_iter, + class_label_names, train_pred, test_pred, + directory, labels, database_name, nb_cores, + duration) + self.view_name = view_name + self.classifier_name = classifier_name + self.shape = shape + + def get_base_string(self): + return "Classification on {} for {} with {}.\n\n".format( + self.database_name, self.view_name, self.classifier_name + ) + + def get_view_specific_info(self): + return "\t- View name : {}\t View shape : {}\n".format(self.view_name, + self.shape) \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py index 61a6996930f6751ab4795670a88e831a3649b70c..f9a1f929addcc4613e8ef05e6787b2089a5db7ff 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/analyze_results.py @@ -1,5 +1,7 @@ from .. import metrics +from ..utils.base import get_metric + # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype @@ -88,14 +90,9 @@ def get_metrics_scores(metrics, train_labels, test_labels, return metrics_scores -def execute(classifier, pred_train_labels, - pred_test_labels, DATASET, - classification_kwargs, classification_indices, - labels_dictionary, views, nb_cores, times, - name, k_folds, - hyper_param_search, n_iter, metric_list, - views_indices, random_state, labels, classifier_module, - directory): +def execute(classifier, pred_train_labels, pred_test_labels, + classification_indices, labels_dictionary, views, name, k_folds, + metrics_list, labels, directory): """ Parameters @@ -106,50 +103,27 @@ def execute(classifier, pred_train_labels, pred_test_labels : labels of test - DATASET : - - classification_kwargs - classification_indices labels_dictionary views - nb_cores - - times - name k_folds - hyper_param_search - - n_iter - - metric_list - - views_indices - - random_state + metrics_list labels - classifier_module - Returns ------- - retuern tuple of (string_analysis, images_analysis, metricsScore) + return tuple of (string_analysis, images_analysis, metricsScore) """ classifier_name = classifier.short_name learning_indices, validation_indices = classification_indices - metric_module = getattr(metrics, metric_list[0][0]) - if metric_list[0][1] is not None: - metric_kwargs = dict((index, metricConfig) for index, metricConfig in - enumerate(metric_list[0][1])) - else: - metric_kwargs = {} + metric_module, metric_kwargs = get_metric(metrics_list) score_on_train = metric_module.score(labels[learning_indices], pred_train_labels, **metric_kwargs) @@ -157,7 +131,7 @@ def execute(classifier, pred_train_labels, pred_test_labels, **metric_kwargs) string_analysis = "\t\tResult for multiview classification with " + classifier_name + \ - "\n\n" + metric_list[0][0] + " :\n\t-On Train : " + str( + "\n\n" + metrics_list[0][0] + " :\n\t-On Train : " + str( score_on_train) + "\n\t-On Test : " + str( score_on_test) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ @@ -167,11 +141,11 @@ def execute(classifier, pred_train_labels, k_folds.n_splits) + \ " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config() - metrics_scores = get_metrics_scores(metric_list, pred_train_labels, + metrics_scores = get_metrics_scores(metrics_list, pred_train_labels, pred_test_labels, validation_indices, learning_indices, labels) - string_analysis += print_metric_score(metrics_scores, metric_list) + string_analysis += print_metric_score(metrics_scores, metrics_list) string_analysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation( directory, labels) images_analysis = {} diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index f5c5a880a55448e37c57572c122f119f7f4bcf0c..59fc7c78402965770a30146d07f5d6b57c1830c4 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -7,8 +7,7 @@ import time import h5py import numpy as np -from . import analyze_results -from .multiview_utils import MultiviewResult +from .multiview_utils import MultiviewResult, MultiviewResultAnalyzer from .. import multiview_classifiers from ..utils import hyper_parameter_search from ..utils.multiclass import get_mc_estim @@ -68,10 +67,7 @@ def init_constants(kwargs, classification_indices, metrics, classifier_config, views, learning_rate, labels -def save_results(classifier, labels_dictionary, string_analysis, views, - classifier_module, - classification_kargs, directory, learning_rate, name, - images_analysis): +def save_results(classifier, string_analysis, directory, name, images_analysis): """ Save results in derectory @@ -99,9 +95,7 @@ def save_results(classifier, labels_dictionary, string_analysis, views, images_analysis : """ - labels_set = set(labels_dictionary.values()) logging.info(string_analysis) - # views_string = "-".join(views) views_string = "mv" cl_type_string = classifier.short_name output_file_name = os.path.join(directory, cl_type_string, @@ -122,7 +116,7 @@ def save_results(classifier, labels_dictionary, string_analysis, views, for i in range(1, 20): test_file_name = output_file_name + image_name + "-" + str( i) + ".png" - if not os.path.isfile(testFileName): + if not os.path.isfile(test_file_name): images_analysis[image_name].savefig(test_file_name, transparent=True) break @@ -291,15 +285,15 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") - pred_train_labels = classifier.predict(dataset_var, + train_pred = classifier.predict(dataset_var, example_indices=learning_indices, view_indices=views_indices) - pred_test_labels = classifier.predict(dataset_var, + test_pred = classifier.predict(dataset_var, example_indices=validation_indices, view_indices=views_indices) full_labels = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100 - full_labels[learning_indices] = pred_train_labels - full_labels[validation_indices] = pred_test_labels + full_labels[learning_indices] = train_pred + full_labels[validation_indices] = test_pred logging.info("Done:\t Pertidcting") classification_time = time.time() - t_start @@ -310,125 +304,27 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.info("Start:\t Result Analysis for " + cl_type) times = (extraction_time, classification_time) - string_analysis, images_analysis, metrics_scores = analyze_results.execute( - classifier, pred_train_labels, - pred_test_labels, dataset_var, - classifier_config, classification_indices, - labels_dictionary, views, nb_cores, times, - name, k_folds, - hyper_param_search, n_iter, metrics, - views_indices, random_state, labels, classifier_module, directory) + result_analyzer = MultiviewResultAnalyzer(view_names=views, + classifier=classifier, + classification_indices=classification_indices, + k_folds=k_folds, + hps_method=hyper_param_search, + metrics_list=metrics, + n_iter=n_iter, + class_label_names=list(labels_dictionary.values()), + train_pred=train_pred, + test_pred=test_pred, + directory=directory, + labels=labels, + database_name=dataset_var.get_name(), + nb_cores=nb_cores, + duration=classification_time) + string_analysis, images_analysis, metrics_scores = result_analyzer.analyze() logging.info("Done:\t Result Analysis for " + cl_type) logging.debug("Start:\t Saving preds") - save_results(classifier, labels_dictionary, string_analysis, views, - classifier_module, - classifier_config, directory, - learning_rate, name, images_analysis) + save_results(classifier, string_analysis, directory, name, images_analysis) logging.debug("Start:\t Saving preds") return MultiviewResult(cl_type, classifier_config, metrics_scores, full_labels) - # return CL_type, classificationKWARGS, metricsScores, fullLabels, testLabelsMulticlass - - -if __name__ == "__main__": - - import argparse - - parser = argparse.ArgumentParser( - description='This methods is used to execute a multiclass classification with one single view. ', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - groupStandard = parser.add_argument_group('Standard arguments') - groupStandard.add_argument('-log', action='store_true', - help='Use option to activate Logging to Console') - groupStandard.add_argument('--type', metavar='STRING', action='store', - help='Type of dataset', default=".hdf5") - groupStandard.add_argument('--name', metavar='STRING', action='store', - help='Name of Database (default: %(default)s)', - default='DB') - groupStandard.add_argument('--view', metavar='STRING', action='store', - help='Name of Feature for Classification (default: %(default)s)', - default='View0') - groupStandard.add_argument('--pathF', metavar='STRING', action='store', - help='Path to the views (default: %(default)s)', - default='results-FeatExtr/') - groupStandard.add_argument('--directory', metavar='STRING', action='store', - help='Path to the views (default: %(default)s)', - default='results-FeatExtr/') - groupStandard.add_argument('--labels_dictionary', metavar='STRING', - action='store', nargs='+', - help='Name of classLabels CSV-file (default: %(default)s)', - default='classLabels.csv') - groupStandard.add_argument('--classificationIndices', metavar='STRING', - action='store', - help='Name of classLabels-Description CSV-file (default: %(default)s)', - default='classLabels-Description.csv') - groupStandard.add_argument('--nbCores', metavar='INT', action='store', - help='Number of cores, -1 for all', type=int, - default=1) - groupStandard.add_argument('--randomState', metavar='INT', action='store', - help='Seed for the random state or pickable randomstate file', - default=42) - groupStandard.add_argument('--hyper_param_search', metavar='STRING', - action='store', - help='The type of method used tosearch the best set of hyper parameters', - default='randomizedSearch') - groupStandard.add_argument('--metrics', metavar='STRING', action='store', - nargs="+", - help='metrics used in the experimentation, the first will be the one used in CV', - default=['']) - groupStandard.add_argument('--nIter', metavar='INT', action='store', - help='Number of itetarion in hyper parameter search', - type=int, - default=10) - - args = parser.parse_args() - - directory = args.directory - name = args.name - labels_dictionary = args.labels_dictionary - classification_indices = args.classification_indices - k_folds = args.k_folds - nb_cores = args.nb_cores - databaseType = None - path = args.path_f - random_state = args.random_state - hyper_param_search = args.hyper_param_search - metrics = args.metrics - n_iter = args.n_iter - kwargs = args.kwargs - - # Extract the data using MPI ? - dataset_var = None - labels = None # (get from CSV ?) - - logfilename = "gen a good logfilename" - - logfile = os.path.join(directory, logfilename) - if os.path.isfile(logfile + ".log"): - for i in range(1, 20): - testFileName = logfilename + "-" + str(i) + ".log" - if not os.path.isfile(os.path.join(directory, testFileName)): - logfile = os.path.join(directory, testFileName) - break - else: - logfile += ".log" - - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', - filename=logfile, level=logging.DEBUG, - filemode='w') - - if args.log: - logging.getLogger().addHandler(logging.StreamHandler()) - - res = exec_multiview(directory, dataset_var, name, classification_indices, - k_folds, - nb_cores, databaseType, path, - labels_dictionary, random_state, labels, - hyper_param_search=hyper_param_search, metrics=metrics, - n_iter=n_iter, **kwargs) - - # Pickle the res - # Go put your token diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index 4fd5fb851f55f49451c024ecab63d649d42818ca..644a9f9ef9a1ca56c5ba48763836df5a6b69b77d 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -3,7 +3,7 @@ from abc import abstractmethod import numpy as np from .. import monoview_classifiers -from ..utils.base import BaseClassifier +from ..utils.base import BaseClassifier, ResultAnalyser from ..utils.dataset import RAMDataset @@ -162,3 +162,24 @@ class MultiviewResult(object): return multiview_classifier.short_name except: return self.classifier_name + + +class MultiviewResultAnalyzer(ResultAnalyser): + + def __init__(self, view_names, classifier, classification_indices, k_folds, + hps_method, metrics_list, n_iter, class_label_names, + train_pred, test_pred, directory, labels, database_name, + nb_cores, duration): + ResultAnalyser.__init__(self, classifier, classification_indices, k_folds, + hps_method, metrics_list, n_iter, class_label_names, + train_pred, test_pred, directory, labels, database_name, + nb_cores, duration) + self.classifier_name = classifier.short_name + self.view_names = view_names + + def get_base_string(self, ): + return "Multiview classification on {} with {}\n\n".format(self.database_name, + self.classifier_name) + + def get_view_specific_info(self): + return "\t-Views : " + ', '.join(self.view_names) + "\n" \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index 3da191fb0b3ab94aadb4cfb7fd0258568c120b72..83b4c555721fc647458f1e36cbe478284b80e2c4 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -33,18 +33,16 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): BaseMultiviewClassifier.__init__(self, random_state=random_state) self.view_weights = view_weights self.monoview_classifier_name = monoview_classifier_name - self.short_name = "early fusion " + self.monoview_classifier_name + self.short_name = "early_fusion_" + self.monoview_classifier_name if monoview_classifier_name in monoview_classifier_config: self.monoview_classifier_config = monoview_classifier_config[ monoview_classifier_name] self.monoview_classifier_config = monoview_classifier_config - monoview_classifier_module = getattr(monoview_classifiers, - self.monoview_classifier_name) - monoview_classifier_class = getattr(monoview_classifier_module, - monoview_classifier_module.classifier_class_name) - self.monoview_classifier = monoview_classifier_class( - random_state=random_state, - **self.monoview_classifier_config) + # monoview_classifier_module = getattr(monoview_classifiers, + # self.monoview_classifier_name) + # monoview_classifier_class = getattr(monoview_classifier_module, + # monoview_classifier_module.classifier_class_name) + self.monoview_classifier = self.init_monoview_estimator(monoview_classifier_name, monoview_classifier_config) self.param_names = ["monoview_classifier_name", "monoview_classifier_config"] self.distribs = [get_available_monoview_classifiers(), @@ -59,7 +57,7 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): monoview_classifier_name, monoview_classifier_config) self.monoview_classifier_config = self.monoview_classifier.get_params() - self.short_name = "early fusion " + self.monoview_classifier_name + self.short_name = "early_fusion_" + self.monoview_classifier_name return self def get_params(self, deep=True): @@ -78,6 +76,7 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier): multiview=False, y=y[train_indices]) self.monoview_classifier.fit(X, y[train_indices]) + self.monoview_classifier_config = self.monoview_classifier.get_params() return self def predict(self, X, example_indices=None, view_indices=None): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/base.py b/multiview_platform/mono_multi_view_classifiers/utils/base.py index e55e33deaf590f1d018538cd0c833121c8a8dc6c..e2f1c5bbad824d0b8d0d43a5b5c1131c143cc9a0 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/base.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/base.py @@ -1,6 +1,9 @@ import numpy as np from sklearn.base import BaseEstimator +from abc import abstractmethod +from datetime import timedelta as hms +from multiview_platform.mono_multi_view_classifiers import metrics class BaseClassifier(BaseEstimator, ): @@ -45,9 +48,9 @@ class BaseClassifier(BaseEstimator, ): def get_config(self): if self.param_names: - return "\n\t\t- " + self.__class__.__name__ + "with " + self.params_to_string() + return self.__class__.__name__ + " with " + self.params_to_string() else: - return "\n\t\t- " + self.__class__.__name__ + "with no config." + return self.__class__.__name__ + "with no config." def to_str(self, param_name): if param_name in self.weird_strings: @@ -89,3 +92,141 @@ class BaseClassifier(BaseEstimator, ): def get_names(classed_list): return np.array([object_.__class__.__name__ for object_ in classed_list]) + + +def get_metric(metric_list): + metric_module = getattr(metrics, metric_list[0][0]) + if metric_list[0][1] is not None: + metric_kwargs = dict((index, metricConfig) for index, metricConfig in + enumerate(metric_list[0][1])) + else: + metric_kwargs = {} + return metric_module, metric_kwargs + +class ResultAnalyser(): + + def __init__(self, classifier, classification_indices, k_folds, + hps_method, metrics_list, n_iter, class_label_names, + train_pred, test_pred, directory, labels, database_name, + nb_cores, duration): + self.classifier = classifier + self.train_indices, self.test_indices = classification_indices + self.k_folds = k_folds + self.hps_method = hps_method + self.metrics_list = metrics_list + self.n_iter = n_iter + self.class_label_names = class_label_names + self.train_pred = train_pred + self.test_pred = test_pred + self.directory = directory + self.labels = labels + self.string_analysis = "" + self.database_name = database_name + self.nb_cores = nb_cores + self.duration = duration + self.metric_scores = {} + + def get_all_metrics_scores(self, ): + for metric, metric_args in self.metrics_list: + self.metric_scores[metric] = self.get_metric_scores(metric, + metric_args) + + def get_metric_scores(self, metric, metric_kwargs): + """ + + Parameters + ---------- + + metric : + + metric_kwargs : + + Returns + ------- + list of [train_score, test_score] + """ + metric_module = getattr(metrics, metric) + train_score = metric_module.score(y_true=self.labels[self.train_indices], + y_pred=self.train_pred, + **metric_kwargs) + test_score = metric_module.score(y_true=self.labels[self.test_indices], + y_pred=self.test_pred, + **metric_kwargs) + return train_score, test_score + + def print_metric_score(self,): + """ + this function print the metrics scores + + Parameters + ---------- + metric_scores : the score of metrics + + metric_list : list of metrics + + Returns + ------- + metric_score_string string containing all metric results + """ + metric_score_string = "\n\n" + for metric, metric_kwargs in self.metrics_list: + metric_module = getattr(metrics, metric) + metric_score_string += "\tFor {} : ".format(metric_module.get_config( + **metric_kwargs)) + metric_score_string += "\n\t\t- Score on train : {}".format(self.metric_scores[metric][0]) + metric_score_string += "\n\t\t- Score on test : {}".format(self.metric_scores[metric][1]) + metric_score_string += "\n\n" + return metric_score_string + + @abstractmethod + def get_view_specific_info(self): + pass + + @abstractmethod + def get_base_string(self): + pass + + def get_db_config_string(self,): + """ + + Parameters + ---------- + + Returns + ------- + + """ + learning_ratio = len(self.train_indices) / ( + len(self.train_indices) + len(self.test_indices)) + db_config_string = "Database configuration : \n" + db_config_string += "\t- Database name : {}\n".format(self.database_name) + db_config_string += self.get_view_specific_info() + db_config_string += "\t- Learning Rate : {}\n".format(learning_ratio) + db_config_string += "\t- Labels used : " + ", ".join( + self.class_label_names) + "\n" + db_config_string += "\t- Number of cross validation folds : {}\n\n".format(self.k_folds.n_splits) + return db_config_string + + def get_classifier_config_string(self, ): + classifier_config_string = "Classifier configuration : \n" + classifier_config_string += "\t- " + self.classifier.get_config()+ "\n" + classifier_config_string += "\t- Executed on {} core(s) \n".format( + self.nb_cores) + + if self.hps_method.startswith('randomized_search'): + classifier_config_string += "\t- Got configuration using randomized search with {} iterations \n" .format(self.n_iter) + return classifier_config_string + + def analyze(self, ): + string_analysis = self.get_base_string() + string_analysis += self.get_db_config_string() + string_analysis += self.get_classifier_config_string() + self.get_all_metrics_scores() + string_analysis += self.print_metric_score() + string_analysis += "\n\n Classification took {}".format(hms(seconds=int(self.duration))) + string_analysis += "\n\n Classifier Interpretation : \n" + string_analysis += self.classifier.get_interpretation( + self.directory, + self.labels[self.test_indices]) + image_analysis = {} + return string_analysis, image_analysis, self.metric_scores diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index acc6a8fef00f2990b837d4f3fba11943826d4ef4..d610ccb70b72ec9778b3029e9bebc85ecc6bbc01 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -41,13 +41,13 @@ class Test_initConstants(unittest.TestCase): X, \ learningRate, \ labelsString, \ - outputFileName = exec_classif_mono_view.initConstants(cls.args, - cls.X, - cls.classification_indices, - cls.labels_names, - cls.name, - cls.directory, - cls.view_name) + outputFileName = exec_classif_mono_view.init_constants(cls.args, + cls.X, + cls.classification_indices, + cls.labels_names, + cls.name, + cls.directory, + cls.view_name) cls.assertEqual(kwargs, cls.args) cls.assertEqual(feat, "test_dataset") cls.assertEqual(CL_type, "test_clf") @@ -128,19 +128,19 @@ class Test_getHPs(unittest.TestCase): os.rmdir(tmp_path) def test_simple(self): - kwargs = exec_classif_mono_view.getHPs(self.classifierModule, - self.hyper_param_search, - self.n_iter, - self.classifier_name, - self.classifier_class_name, - self.X, - self.y, - self.random_state, - self.output_file_name, - self.cv, - self.nb_cores, - self.metrics, - self.kwargs) + kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule, + self.hyper_param_search, + self.n_iter, + self.classifier_name, + self.classifier_class_name, + self.X, + self.y, + self.random_state, + self.output_file_name, + self.cv, + self.nb_cores, + self.metrics, + self.kwargs) # class Test_getKWARGS(unittest.TestCase): #