diff --git a/config_files/config_test.yml b/config_files/config_test.yml index 0a83c99239acbf7070a742a076a4a0e946151526..f400a1855633ac093b2703f30f477330e27e224b 100644 --- a/config_files/config_test.yml +++ b/config_files/config_test.yml @@ -13,7 +13,7 @@ debug: True add_noise: False noise_std: 0.0 res_dir: "../results/" -track_tracebacks: True +track_tracebacks: False # All the classification-realted configuration options multiclass_method: "oneVersusOne" @@ -21,13 +21,13 @@ split: 0.49 nb_folds: 2 nb_class: 2 classes: -type: ["multiview", "monoview"] +type: ["multiview"] algos_monoview: ["decision_tree" ] algos_multiview: ["weighted_linear_early_fusion",] stats_iter: 2 metrics: ["accuracy_score", "f1_score"] metric_princ: "accuracy_score" -hps_type: "Random" +hps_type: "None" hps_args: n_iter: 4 equivalent_draws: False @@ -48,6 +48,34 @@ hps_args: ###################################### ## The Monoview Classifier arguments # ###################################### +weighted_linear_early_fusion: + monoview_classifier_config: + init: CustomDecisionTree + init__ccp_alpha: 0.0 + init__class_weight: null + init__criterion: gini + init__max_depth: 1 + init__max_features: null + init__max_leaf_nodes: null + init__min_impurity_decrease: 0.0 + init__min_impurity_split: null + init__min_samples_leaf: 1 + init__min_samples_split: 2 + init__min_weight_fraction_leaf: 0.0 + init__presort: deprecated + init__random_state: null + init__splitter: best + loss: exponential + max_depth: 5 + n_estimators: 412 + monoview_classifier_name: gradient_boosting + view_weights: + - 0.16666666666666669 + - 0.16666666666666669 + - 0.16666666666666669 + - 0.16666666666666669 + - 0.16666666666666669 + - 0.16666666666666669 # #random_forest: # n_estimators: [25] diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index 084b01dcb1435a5d0457e7155650fb0b6cdc4e0a..acb47b7511685a5eb947d9c9e97e68de4afaae0f 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -199,7 +199,7 @@ def init_monoview_exps(classifier_names, def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class, view_index, view_name, hps_kwargs): if classifier_name in arguments: - classifier_config = dict((key, value[0]) for key, value in arguments[ + classifier_config = dict((key, value) for key, value in arguments[ classifier_name].items()) else: classifier_config = {} @@ -227,10 +227,7 @@ def extract_dict(classifier_config): """Reverse function of get_path_dict""" extracted_dict = {} for key, value in classifier_config.items(): - if isinstance(value, list): - extracted_dict = set_element(extracted_dict, key, value[0]) - else: - extracted_dict = set_element(extracted_dict, key, value) + extracted_dict = set_element(extracted_dict, key, value) return extracted_dict diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py index fc1f7c37b774debe568a15b6bab99e4f9089e354..7136370f1c684ead6010e6a9a944da031fdf4779 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/gradient_boosting.py @@ -16,7 +16,7 @@ __status__ = "Prototype" # Production, Development, Prototype classifier_class_name = "GradientBoosting" -class CustomDecisionTree(DecisionTreeClassifier): +class CustomDecisionTreeGB(DecisionTreeClassifier): def predict(self, X, check_input=True): y_pred = DecisionTreeClassifier.predict(self, X, check_input=check_input) @@ -27,7 +27,7 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, loss="exponential", max_depth=1.0, n_estimators=100, - init=CustomDecisionTree(max_depth=1), + init=CustomDecisionTreeGB(max_depth=1), **kwargs): GradientBoostingClassifier.__init__(self, loss=loss, diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py index f16be40abfd8691a3f194d4826214d6a5454d843..fd18a3b02edc250479a688084ec4759c7dde6e5f 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py @@ -60,21 +60,22 @@ def publish_example_errors(example_errors, directory, databaseName, np.savetxt(base_file_name + "bar_plot_data.csv", error_on_examples, delimiter=",") - plot_2d(data_2d, classifiers_names, nb_classifiers, nb_examples, - base_file_name, example_ids=example_ids, labels=labels) + plot_2d(data_2d, classifiers_names, nb_classifiers, base_file_name, + example_ids=example_ids, labels=labels) - plot_errors_bar(error_on_examples, nb_classifiers, nb_examples, - base_file_name) + plot_errors_bar(error_on_examples, nb_examples, + base_file_name, example_ids=example_ids) logging.debug("Done:\t Biclass Label analysis figures generation") + def publish_all_example_errors(iter_results, directory, stats_iter, example_ids, labels): logging.debug( "Start:\t Global label analysis figure generation") - nbExamples, nbClassifiers, data, \ + nb_examples, nb_classifiers, data, \ error_on_examples, classifier_names = gen_error_data_glob(iter_results, stats_iter) @@ -82,17 +83,16 @@ def publish_all_example_errors(iter_results, directory, np.savetxt(os.path.join(directory, "example_errors.csv"), error_on_examples, delimiter=",") - plot_2d(data, classifier_names, nbClassifiers, nbExamples, + plot_2d(data, classifier_names, nb_classifiers, os.path.join(directory, ""), stats_iter=stats_iter, example_ids=example_ids, labels=labels) - plot_errors_bar(error_on_examples, nbClassifiers * stats_iter, - nbExamples, os.path.join(directory, "")) + plot_errors_bar(error_on_examples, nb_examples, os.path.join(directory, ""), + example_ids=example_ids) logging.debug( "Done:\t Global label analysis figures generation") - def gen_error_data(example_errors): r"""Used to format the error data in order to plot it efficiently. The data is saves in a `.csv` file. @@ -106,14 +106,6 @@ def gen_error_data(example_errors): - 1 if the classifier `<classifier_name>` classifier well the example, - 0 if it fail to classify the example, - -100 if it did not classify the example (multiclass one versus one). - base_file_name : list of str - The name of the file in which the figure will be saved - ("2D_plot_data.csv" and "bar_plot_data.csv" will - be added at the end). - nbCopies : int, optinal, default: 2 - The number of times the data is copied (classifier wise) in order for - the figure to be more readable. - Returns ------- @@ -146,9 +138,10 @@ def gen_error_data(example_errors): except: import pdb; pdb.set_trace() - error_on_examples = -1 * np.sum(data_2d, axis=1) / nb_classifiers + error_on_examples = np.sum(data_2d, axis=1) / nb_classifiers return nb_classifiers, nb_examples, classifiers_names, data_2d, error_on_examples + def gen_error_data_glob(iter_results, stats_iter): nb_examples = next(iter(iter_results.values())).shape[0] nb_classifiers = len(iter_results) @@ -158,16 +151,14 @@ def gen_error_data_glob(iter_results, stats_iter): iter_results.items()): data[:, clf_index] = error_data classifier_names.append(classifier_name) - error_on_examples = -1 * np.sum(data, axis=1) + ( + error_on_examples = np.sum(data, axis=1) / ( nb_classifiers * stats_iter) return nb_examples, nb_classifiers, data, error_on_examples, \ classifier_names -def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, - file_name, minSize=10, labels=None, - width_denominator=2.0, height_denominator=20.0, stats_iter=1, - use_plotly=True, example_ids=None): +def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None, + stats_iter=1, use_plotly=True, example_ids=None): r"""Used to generate a 2D plot of the errors. Parameters @@ -177,12 +168,8 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, and -100 if the example was not classified. classifiers_names : list of str The names of the classifiers. - nbClassifiers : int + nb_classifiers : int The number of classifiers. - nbExamples : int - The number of examples. - nbCopies : int - The number of times the data is copied (classifier wise) in order for the figure to be more readable file_name : str The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end) minSize : int, optinal, default: 10 @@ -198,13 +185,18 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, ------- """ fig, ax = plt.subplots(nrows=1, ncols=1, ) + label_index_list = np.concatenate([np.where(labels == i)[0] for i in + np.unique( + labels)]) cmap, norm = iter_cmap(stats_iter) - cax = plt.imshow(data, cmap=cmap, norm=norm, + cax = plt.imshow(data[np.flip(label_index_list), :], cmap=cmap, norm=norm, aspect='auto') plt.title('Errors depending on the classifier') - ticks = np.arange(0, nbClassifiers, 1) + ticks = np.arange(0, nb_classifiers, 1) tick_labels = classifiers_names plt.xticks(ticks, tick_labels, rotation="vertical") + plt.yticks([], []) + plt.ylabel("Examples") cbar = fig.colorbar(cax, ticks=[-100 * stats_iter / 2, 0, stats_iter]) cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right']) @@ -213,9 +205,7 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, plt.close() ### The following part is used to generate an interactive graph. if use_plotly: - label_index_list = np.concatenate([np.where(labels == i)[0] for i in - np.unique( - labels)]) # [np.where(labels==i)[0] for i in np.unique(labels)] + # [np.where(labels==i)[0] for i in np.unique(labels)] hover_text = [[example_ids[example_index] + " failed " + str( stats_iter - data[ example_index, classifier_index]) + " time(s), labelled " + str( @@ -223,7 +213,6 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, for classifier_index in range(data.shape[1])] for example_index in range(data.shape[0])] fig = plotly.graph_objs.Figure() - # for row_index, label_index in enumerate(label_index_list): fig.add_trace(plotly.graph_objs.Heatmap( x=list(classifiers_names), y=[example_ids[label_ind] for label_ind in label_index_list], @@ -234,8 +223,7 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, colorbar=dict(tickvals=[0, stats_iter], ticktext=["Always Wrong", "Always Right"]), reversescale=True), ) - fig.update_yaxes(title_text="Examples", showticklabels=False, ticks='') - fig.update_xaxes(showticklabels=False, ) + fig.update_yaxes(title_text="Examples", showticklabels=True) fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') fig.update_xaxes(showticklabels=True, ) @@ -244,7 +232,8 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples, del fig -def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName): +def plot_errors_bar(error_on_examples, nb_examples, file_name, + use_plotly=True, example_ids=None): r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples Parameters @@ -253,23 +242,28 @@ def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName): An array counting how many classifiers failed to classifiy each examples. classifiers_names : list of str The names of the classifiers. - nbClassifiers : int + nb_classifiers : int The number of classifiers. - nbExamples : int + nb_examples : int The number of examples. - fileName : str + file_name : str The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end) Returns ------- """ fig, ax = plt.subplots() - x = np.arange(nbExamples) + x = np.arange(nb_examples) plt.bar(x, error_on_examples) - plt.ylim([0, nbClassifiers]) plt.title("Number of classifiers that failed to classify each example") - fig.savefig(fileName + "error_analysis_bar.png", transparent=True) + fig.savefig(file_name + "error_analysis_bar.png", transparent=True) plt.close() + if use_plotly: + fig = plotly.graph_objs.Figure([plotly.graph_objs.Bar(x=example_ids, y=error_on_examples)]) + plotly.offline.plot(fig, filename=file_name + "error_analysis_bar.html", + auto_open=False) + + def iter_cmap(statsIter): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py index 734092cd1d07175bc72d663bdb47e54eb3e2727a..d65af38d05c9d67ff071c67e1eac7c97a937c765 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/hyper_parameter_search.py @@ -1,6 +1,7 @@ import itertools import sys import traceback +import yaml from abc import abstractmethod import matplotlib.pyplot as plt @@ -8,7 +9,7 @@ import numpy as np from scipy.stats import randint, uniform from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \ ParameterGrid, ParameterSampler -from sklearn.base import clone +from sklearn.base import clone, BaseEstimator from .multiclass import MultiClassWrapper from .organization import secure_file_path @@ -132,9 +133,8 @@ class HPSearch: scores_array = scores_array[sorted_indices] output_string = "" for parameters, score in zip(tested_params, scores_array): - if "random_state" in parameters: - parameters.pop("random_state") - output_string += "\n{}\t\t{}".format(parameters, score) + formatted_params = format_params(parameters) + output_string += "\n{}\n\t\t{}".format(yaml.dump(formatted_params), score) if self.tracebacks: output_string += "Failed : \n\n\n" for traceback, params in zip(self.tracebacks, self.tracebacks_params): @@ -417,6 +417,20 @@ class CustomUniform: return unif +def format_params(params): + if isinstance(params, dict): + return dict((key, format_params(value)) + for key, value in params.items() + if key!="random_state") + elif isinstance(params, BaseEstimator): + return params.__class__.__name__ + elif isinstance(params, np.ndarray): + return [float(param) for param in params] + elif isinstance(params, np.float64): + return float(params) + else: + return params + # def randomized_search_(dataset_var, labels, classifier_package, classifier_name, # metrics_list, learning_indices, k_folds, random_state,