diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
index 2d6a1532f649598eb8dbfa8d791e8e68d088993e..084b01dcb1435a5d0457e7155650fb0b6cdc4e0a 100644
--- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py
+++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
@@ -14,7 +14,8 @@ from . import monoview_classifiers
 from . import multiview_classifiers
 from .monoview.exec_classif_mono_view import exec_monoview
 from .multiview.exec_multiview import exec_multiview
-from .result_analysis import get_results, plot_results_noise, analyze_iterations
+from .result_analysis.noise_analysis import plot_results_noise
+from .result_analysis.execution import analyze_iterations, analyze
 from .utils import execution, dataset, configuration
 from .utils.organization import secure_file_path
 from .utils.dataset import delete_HDF5
@@ -760,7 +761,7 @@ def exec_benchmark(nb_cores, stats_iter,
                    benchmark_arguments_dictionaries,
                    directory, metrics, dataset_var, track_tracebacks,
                    exec_one_benchmark_mono_core=exec_one_benchmark_mono_core,
-                   get_results=get_results, delete=delete_HDF5,
+                   analyze=analyze, delete=delete_HDF5,
                    analyze_iterations=analyze_iterations):
     r"""Used to execute the needed benchmark(s) on multicore or mono-core functions.
 
@@ -834,12 +835,12 @@ def exec_benchmark(nb_cores, stats_iter,
 
     # Do everything with flagging
     logging.debug("Start:\t Analyzing predictions")
-    results_mean_stds = get_results(results, stats_iter,
-                                    benchmark_arguments_dictionaries,
-                                    metrics,
-                                    directory,
-                                    dataset_var.example_ids,
-                                    dataset_var.get_labels())
+    results_mean_stds = analyze(results, stats_iter,
+                                benchmark_arguments_dictionaries,
+                                metrics,
+                                directory,
+                                dataset_var.example_ids,
+                                dataset_var.get_labels())
     logging.debug("Done:\t Analyzing predictions")
     delete(benchmark_arguments_dictionaries, nb_cores, dataset_var)
     return results_mean_stds
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py
deleted file mode 100644
index fdbe28178b653a5929bddd13d3aca9ec8b90ea66..0000000000000000000000000000000000000000
--- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py
+++ /dev/null
@@ -1,1320 +0,0 @@
-# Import built-in modules
-import logging
-import os
-
-import matplotlib as mpl
-# Import third party modules
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import plotly
-from matplotlib.patches import Patch
-
-# Import own Modules
-from .monoview.monoview_utils import MonoviewResult
-from .utils.organization import secure_file_path
-
-# Author-Info
-__author__ = "Baptiste Bauvin"
-__status__ = "Prototype"  # Production, Development, Prototype
-
-
-def save_dict_to_text(dictionnary, output_file):
-    # TODO : smarter way must exist
-    output_file.write("Failed algorithms : \n\t" + ",\n\t".join(
-        dictionnary.keys()) + ".\n\n\n")
-    for key, value in dictionnary.items():
-        output_file.write(key)
-        output_file.write("\n\n")
-        output_file.write(value)
-        output_file.write("\n\n\n")
-    return dictionnary.keys()
-
-
-def plot_results_noise(directory, noise_results, metric_to_plot, name,
-                       width=0.1):
-    avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray",
-                    "tab:olive", "tab:red", ]
-    colors = {}
-    lengend_patches = []
-    noise_levels = np.array([noise_level for noise_level, _ in noise_results])
-    df = pd.DataFrame(
-        columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], )
-    if len(noise_results) > 1:
-        width = np.min(np.diff(noise_levels))
-    for noise_level, noise_result in noise_results:
-        classifiers_names, meaned_metrics, metric_stds = [], [], []
-        for noise_result in noise_result:
-            classifier_name = noise_result[0].split("-")[0]
-            if noise_result[1] is metric_to_plot:
-                classifiers_names.append(classifier_name)
-                meaned_metrics.append(noise_result[2])
-                metric_stds.append(noise_result[3])
-                if classifier_name not in colors:
-                    try:
-                        colors[classifier_name] = avail_colors.pop(0)
-                    except IndexError:
-                        colors[classifier_name] = "k"
-        classifiers_names, meaned_metrics, metric_stds = np.array(
-            classifiers_names), np.array(meaned_metrics), np.array(metric_stds)
-        sorted_indices = np.argsort(-meaned_metrics)
-        for index in sorted_indices:
-            row = pd.DataFrame(
-                {'noise_level': noise_level,
-                 'classifier_name': classifiers_names[index],
-                 'mean_score': meaned_metrics[index],
-                 'score_std': metric_stds[index]}, index=[0])
-            df = pd.concat([df, row])
-            plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index],
-                    width=0.5 * width, label=classifiers_names[index],
-                    color=colors[classifiers_names[index]])
-    for classifier_name, color in colors.items():
-        lengend_patches.append(Patch(facecolor=color, label=classifier_name))
-    plt.legend(handles=lengend_patches, loc='lower center',
-               bbox_to_anchor=(0.5, 1.05), ncol=2)
-    plt.ylabel(metric_to_plot)
-    plt.title(name)
-    plt.xticks(noise_levels)
-    plt.xlabel("Noise level")
-    plt.savefig(os.path.join(directory, name + "_noise_analysis.png"))
-    plt.close()
-    df.to_csv(os.path.join(directory, name + "_noise_analysis.csv"))
-
-
-def plot_metric_scores(train_scores, test_scores, names, nb_results,
-                       metric_name,
-                       file_name,
-                       tag="", train_STDs=None, test_STDs=None,
-                       use_plotly=True):
-    r"""Used to plot and save the score barplot for a specific metric.
-
-    Parameters
-    ----------
-    train_scores : list or np.array of floats
-        The scores of each classifier on the training set.
-    test_scores : list or np.array of floats
-        The scores of each classifier on the testing set.
-    names : list or np.array of strs
-        The names of all the classifiers.
-    nb_results: int
-        The number of classifiers to plot.
-    metric_name : str
-        The plotted metric's name
-    file_name : str
-        The name of the file where the figure will be saved.
-    tag : str
-        Some text to personalize the title, must start with a whitespace.
-    train_STDs : np.array of floats or None
-        The array containing the standard deviations for the averaged scores on the training set.
-    test_STDs : np.array of floats or None
-        The array containing the standard deviations for the averaged scores on the testing set.
-
-    Returns
-    -------
-    """
-
-    figKW, barWidth = get_fig_size(nb_results)
-
-    names, train_scores, test_scores, train_STDs, test_STDs = sort_by_test_score(
-        train_scores, test_scores, names,
-        train_STDs, test_STDs)
-
-    f, ax = plt.subplots(nrows=1, ncols=1, **figKW)
-    ax.set_title(metric_name + "\n" + tag + " scores for each classifier")
-
-    rects = ax.bar(range(nb_results), test_scores, barWidth, color="0.1",
-                   yerr=test_STDs)
-    rect2 = ax.bar(np.arange(nb_results) + barWidth, train_scores, barWidth,
-                   color="0.8", yerr=train_STDs)
-    autolabel(rects, ax, set=1, std=test_STDs)
-    autolabel(rect2, ax, set=2, std=train_STDs)
-    ax.legend((rects[0], rect2[0]), ('Test', 'Train'))
-    ax.set_ylim(-0.1, 1.1)
-    ax.set_xticks(np.arange(nb_results) + barWidth / 2)
-    ax.set_xticklabels(names, rotation="vertical")
-
-    try:
-        plt.tight_layout()
-    except:
-        pass
-    f.savefig(file_name + '.png', transparent=True)
-    plt.close()
-    import pandas as pd
-    if train_STDs is None:
-        dataframe = pd.DataFrame(np.transpose(np.concatenate((
-            train_scores.reshape((train_scores.shape[0], 1)),
-            test_scores.reshape((train_scores.shape[0], 1))), axis=1)),
-            columns=names, index=["Train", "Test"])
-    else:
-        dataframe = pd.DataFrame(np.transpose(np.concatenate((
-            train_scores.reshape((train_scores.shape[0], 1)),
-            train_STDs.reshape((train_scores.shape[0], 1)),
-            test_scores.reshape((train_scores.shape[0], 1)),
-            test_STDs.reshape((train_scores.shape[0], 1))), axis=1)),
-            columns=names, index=["Train", "Train STD", "Test", "Test STD"])
-    dataframe.to_csv(file_name + ".csv")
-    if use_plotly:
-        fig = plotly.graph_objs.Figure()
-        fig.add_trace(plotly.graph_objs.Bar(
-            name='Train',
-            x=names, y=train_scores,
-            error_y=dict(type='data', array=train_STDs),
-            marker_color="lightgrey",
-        ))
-        fig.add_trace(plotly.graph_objs.Bar(
-            name='Test',
-            x=names, y=test_scores,
-            error_y=dict(type='data', array=test_STDs),
-            marker_color="black",
-        ))
-
-        fig.update_layout(
-            title=metric_name + "<br>" + tag + " scores for each classifier")
-        fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
-                          plot_bgcolor='rgba(0,0,0,0)')
-        plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
-        del fig
-
-
-def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
-            file_name, minSize=10, labels=None,
-            width_denominator=2.0, height_denominator=20.0, stats_iter=1,
-            use_plotly=True, example_ids=None):
-    r"""Used to generate a 2D plot of the errors.
-
-    Parameters
-    ----------
-    data : np.array of shape `(nbClassifiers, nbExamples)`
-        A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well
-        and -100 if the example was not classified.
-    classifiers_names : list of str
-        The names of the classifiers.
-    nbClassifiers : int
-        The number of classifiers.
-    nbExamples : int
-        The number of examples.
-    nbCopies : int
-        The number of times the data is copied (classifier wise) in order for the figure to be more readable
-    file_name : str
-        The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end)
-    minSize : int, optinal, default: 10
-        The minimum width and height of the figure.
-    width_denominator : float, optional, default: 1.0
-        To obtain the image width, the number of classifiers will be divided by this number.
-    height_denominator : float, optional, default: 1.0
-        To obtain the image width, the number of examples will be divided by this number.
-    stats_iter : int, optional, default: 1
-        The number of statistical iterations realized.
-
-    Returns
-    -------
-    """
-    fig, ax = plt.subplots(nrows=1, ncols=1, )
-    cmap, norm = iter_cmap(stats_iter)
-    cax = plt.imshow(data, cmap=cmap, norm=norm,
-                     aspect='auto')
-    plt.title('Errors depending on the classifier')
-    ticks = np.arange(0, nbClassifiers, 1)
-    tick_labels = classifiers_names
-    plt.xticks(ticks, tick_labels, rotation="vertical")
-    cbar = fig.colorbar(cax, ticks=[-100 * stats_iter / 2, 0, stats_iter])
-    cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right'])
-
-    fig.savefig(file_name + "error_analysis_2D.png", bbox_inches="tight",
-                transparent=True)
-    plt.close()
-    ### The following part is used to generate an interactive graph.
-    if use_plotly:
-        label_index_list = np.concatenate([np.where(labels == i)[0] for i in
-                                           np.unique(
-                                               labels)])  # [np.where(labels==i)[0] for i in np.unique(labels)]
-        hover_text = [[example_ids[example_index] + " failed " + str(
-            stats_iter - data[
-                example_index, classifier_index]) + " time(s), labelled " + str(
-            labels[example_index])
-                       for classifier_index in range(data.shape[1])]
-                      for example_index in range(data.shape[0])]
-        fig = plotly.graph_objs.Figure()
-        # for row_index, label_index in enumerate(label_index_list):
-        fig.add_trace(plotly.graph_objs.Heatmap(
-            x=list(classifiers_names),
-            y=[example_ids[label_ind] for label_ind in label_index_list],
-            z=data[label_index_list, :],
-            text=[hover_text[label_ind] for label_ind in label_index_list],
-            hoverinfo=["y", "x", "text"],
-            colorscale="Greys",
-            colorbar=dict(tickvals=[0, stats_iter],
-                          ticktext=["Always Wrong", "Always Right"]),
-            reversescale=True), )
-        fig.update_yaxes(title_text="Examples", showticklabels=False, ticks='')
-        fig.update_xaxes(showticklabels=False, )
-        fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
-                          plot_bgcolor='rgba(0,0,0,0)')
-        fig.update_xaxes(showticklabels=True, )
-        plotly.offline.plot(fig, filename=file_name + "error_analysis_2D.html",
-                            auto_open=False)
-        del fig
-
-
-def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName):
-    r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples
-
-    Parameters
-    ----------
-    error_on_examples : np.array of shape `(nbExamples,)`
-        An array counting how many classifiers failed to classifiy each examples.
-    classifiers_names : list of str
-        The names of the classifiers.
-    nbClassifiers : int
-        The number of classifiers.
-    nbExamples : int
-        The number of examples.
-    fileName : str
-        The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end)
-
-    Returns
-    -------
-    """
-    fig, ax = plt.subplots()
-    x = np.arange(nbExamples)
-    plt.bar(x, error_on_examples)
-    plt.ylim([0, nbClassifiers])
-    plt.title("Number of classifiers that failed to classify each example")
-    fig.savefig(fileName + "error_analysis_bar.png", transparent=True)
-    plt.close()
-
-
-def iter_cmap(statsIter):
-    r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better.
-
-    Parameters
-    ----------
-    statsIter : int
-        The number of statistical iterations.
-
-    Returns
-    -------
-    cmap : matplotlib.colors.ListedColorMap object
-        The colormap.
-    norm : matplotlib.colors.BoundaryNorm object
-        The bounds for the colormap.
-    """
-    cmapList = ["red", "0.0"] + [str(float((i + 1)) / statsIter) for i in
-                                 range(statsIter)]
-    cmap = mpl.colors.ListedColormap(cmapList)
-    bounds = [-100 * statsIter - 0.5, -0.5]
-    for i in range(statsIter):
-        bounds.append(i + 0.5)
-    bounds.append(statsIter + 0.5)
-    norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
-    return cmap, norm
-
-
-def autolabel(rects, ax, set=1, std=None):
-    r"""Used to print the score below the bars.
-
-    Parameters
-    ----------
-    rects : pyplot bar object
-        THe bars.
-    ax : pyplot ax object
-        The ax.
-    set : integer
-        1 means the test scores, anything else means the train score
-    std: None or array
-        The standard deviations in the case of statsIter results.
-
-    Returns
-    -------
-    """
-    if set == 1:
-        text_height = -0.05
-        weight = "bold"
-    else:
-        text_height = -0.07
-        weight = "normal"
-    for rectIndex, rect in enumerate(rects):
-        height = rect.get_height()
-        if std is not None:
-            ax.text(rect.get_x() + rect.get_width() / 2., text_height,
-                    "%.2f" % height + u'\u00B1' + "%.2f" % std[rectIndex],
-                    weight=weight,
-                    ha='center', va='bottom', size="x-small")
-        else:
-            ax.text(rect.get_x() + rect.get_width() / 2., text_height,
-                    "%.2f" % height, weight=weight,
-                    ha='center', va='bottom', size="small")
-
-
-def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35):
-    r"""Used to get the image size to save the figure and the bar width, depending on the number of scores to plot.
-
-    Parameters
-    ----------
-    nb_results : int
-        The number of couple of bar to plot.
-    min_size : int
-        The minimum size of the image, if there are few classifiers to plot.
-    multiplier : float
-        The ratio between the image size and the number of classifiers.
-    bar_width : float
-        The width of the bars in the figure. Mainly here to centralize bar_width.
-
-    Returns
-    -------
-    fig_kwargs : dict of arguments
-        The argument restraining the size of the figure, usable directly in the `subplots` function of
-        `matplotlib.pyplot`.
-    bar_width : float
-        The width of the bars in the figure. Mainly here to centralize bar_width.
-    """
-    size = nb_results * multiplier
-    if size < min_size:
-        size = min_size
-    fig_kwargs = {"figsize": (size, size / 3)}
-    return fig_kwargs, bar_width
-
-
-def get_metrics_scores(metrics, results):
-    r"""Used to extract metrics scores in case of biclass classification
-
-    Parameters
-    ----------
-    metrics : list of lists
-        The metrics names with configuration metrics[i][0] = name of metric i
-    results : list of MonoviewResult and MultiviewResults objects
-        A list containing all the results for all the monoview experimentations.
-
-    Returns
-    -------
-    metricsScores : dict of dict of list
-        Regroups all the scores for each metrics for each classifier and for the train and test sets.
-        organized as :
-        -`metricScores[metric_name]["classifiers_names"]` is a list of all the classifiers available for this metric,
-        -`metricScores[metric_name]["train_scores"]` is a list of all the available classifiers scores on the train set,
-        -`metricScores[metric_name]["test_scores"]` is a list of all the available classifiers scores on the test set.
-    """
-    classifier_names = []
-    classifier_names = [classifierResult.get_classifier_name()
-                        for classifierResult in results
-                        if classifierResult.get_classifier_name()
-                        not in classifier_names]
-    metrics_scores = dict((metric[0], pd.DataFrame(data=np.zeros((2,
-                                                                  len(
-                                                                      classifier_names))),
-                                                   index=["train", "test"],
-                                                   columns=classifier_names))
-                          for metric in metrics)
-
-    for metric in metrics:
-        for classifierResult in results:
-            metrics_scores[metric[0]].loc[
-                "train", classifierResult.get_classifier_name()] = \
-            classifierResult.metrics_scores[metric[0]][0]
-            metrics_scores[metric[0]].loc[
-                "test", classifierResult.get_classifier_name()] = \
-                classifierResult.metrics_scores[metric[0]][1]
-
-    return metrics_scores
-
-
-def get_example_errors(groud_truth, results):
-    r"""Used to get for each classifier and each example whether the classifier has misclassified the example or not.
-
-    Parameters
-    ----------
-    ground_truth : numpy array of 0, 1 and -100 (if multiclass)
-        The array with the real labels of the examples
-    results : list of MonoviewResult and MultiviewResults objects
-        A list containing all the resluts for all the mono- & multi-view experimentations.
-
-    Returns
-    -------
-    example_errors : dict of np.array
-        For each classifier, has an entry with a `np.array` over the examples, with a 1 if the examples was
-        well-classified, a 0 if not and if it's multiclass classification, a -100 if the examples was not seen during
-        the one versus one classification.
-    """
-    example_errors = {}
-
-    for classifier_result in results:
-        error_on_examples = np.equal(classifier_result.full_labels_pred,
-                                     groud_truth).astype(int)
-        unseen_examples = np.where(groud_truth == -100)[0]
-        error_on_examples[unseen_examples] = -100
-        example_errors[
-            classifier_result.get_classifier_name()] = error_on_examples
-    return example_errors
-
-
-def sort_by_test_score(train_scores, test_scores, names, train_STDs=None,
-                       test_STDs=None):
-    r"""Used to sort the results (names and both scores) in descending test score order.
-
-    Parameters
-    ----------
-    train_scores : np.array of floats
-        The scores of each classifier on the training set.
-    test_scores : np.array of floats
-        The scores of each classifier on the testing set.
-    names : np.array of strs
-        The names of all the classifiers.
-    train_STDs : np.array of floats or None
-        The array containing the standard deviations for the averaged scores on the training set.
-    test_STDs : np.array of floats or None
-        The array containing the standard deviations for the averaged scores on the testing set.
-
-    Returns
-    -------
-    sorted_names : np.array of strs
-        The names of all the classifiers, sorted in descending test score order.
-    sorted_train_scores : np.array of floats
-        The scores of each classifier on the training set, sorted in descending test score order.
-    sorted_test_scores : np.array of floats
-        The scores of each classifier on the testing set, sorted in descending test score order.
-    sorted_train_STDs : np.array of floats or None
-        The array containing the standard deviations for the averaged scores on the training set,
-        sorted in descending test score order.
-    sorted_test_STDs : np.array of floats or None
-        The array containing the standard deviations for the averaged scores on the testing set,
-        sorted in descending test score order.
-    """
-    sorted_indices = np.argsort(test_scores)
-    sorted_test_scores = test_scores[sorted_indices]
-    sorted_train_scores = train_scores[sorted_indices]
-    sorted_names = names[sorted_indices]
-    if train_STDs is not None and test_STDs is not None:
-        sorted_train_STDs = train_STDs[sorted_indices]
-        sorted_test_STDs = test_STDs[sorted_indices]
-    else:
-        sorted_train_STDs = None
-        sorted_test_STDs = None
-    return sorted_names, sorted_train_scores, sorted_test_scores, sorted_train_STDs, sorted_test_STDs
-
-
-def publish_metrics_graphs(metrics_scores, directory, database_name,
-                           labels_names):
-    r"""Used to sort the results (names and both scores) in descending test score order.
-
-    Parameters
-    ----------
-    metrics_scores : dict of dicts of lists or np.arrays
-        Keys : The names of the metrics.
-        Values : The scores and names of each classifier .
-    directory : str
-        The path to the directory where the figures will be saved.
-    database_name : str
-        The name of the database on which the experiments where conducted.
-    labels_names : list of strs
-        The name corresponding to each numerical label.
-
-    Returns
-    -------
-    results
-    """
-    results = []
-    for metric_name, metric_dataframe in metrics_scores.items():
-        logging.debug(
-            "Start:\t Biclass score graph generation for " + metric_name)
-        train_scores, test_scores, classifier_names, \
-        file_name, nb_results, results = init_plot(results, metric_name,
-                                                   metric_dataframe, directory,
-                                                   database_name, labels_names)
-
-        plot_metric_scores(train_scores, test_scores, classifier_names,
-                           nb_results, metric_name, file_name,
-                           tag=" " + " vs ".join(labels_names))
-        logging.debug(
-            "Done:\t Biclass score graph generation for " + metric_name)
-    return results
-
-
-def init_plot(results, metric_name, metric_dataframe,
-              directory, database_name, labels_names):
-    train = np.array(metric_dataframe.loc["train"])
-    test = np.array(metric_dataframe.loc["test"])
-    classifier_names = np.array(metric_dataframe.columns)
-
-    nb_results = metric_dataframe.shape[1]
-
-    file_name = os.path.join(directory, database_name + "-" + "_vs_".join(
-        labels_names) + "-" + metric_name)
-
-    results += [[classifiers_name, metric_name, testMean, testSTD]
-                for classifiers_name, testMean, testSTD in
-                zip(classifier_names, test, np.zeros(len(test)))]
-    return train, test, classifier_names, file_name, nb_results, results
-
-
-def gen_error_data(example_errors):
-    r"""Used to format the error data in order to plot it efficiently. The data is saves in a `.csv` file.
-
-    Parameters
-    ----------
-    example_errors : dict of dicts of np.arrays
-        A dictionary conatining all the useful data. Organized as :
-        `example_errors[<classifier_name>]["error_on_examples"]` is a np.array of ints with a
-        - 1 if the classifier `<classifier_name>` classifier well the example,
-        - 0 if it fail to classify the example,
-        - -100 if it did not classify the example (multiclass one versus one).
-    base_file_name : list of str
-        The name of the file in which the figure will be saved ("2D_plot_data.csv" and "bar_plot_data.csv" will
-        be added at the end).
-    nbCopies : int, optinal, default: 2
-        The number of times the data is copied (classifier wise) in order for the figure to be more readable.
-
-
-    Returns
-    -------
-    nbClassifiers : int
-        Number of different classifiers.
-    nbExamples : int
-        NUmber of examples.
-    nbCopies : int
-        The number of times the data is copied (classifier wise) in order for the figure to be more readable.
-    classifiers_names : list of strs
-        The names fo the classifiers.
-    data : np.array of shape `(nbClassifiers, nbExamples)`
-        A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well
-        and -100 if the example was not classified.
-    error_on_examples : np.array of shape `(nbExamples,)`
-        An array counting how many classifiers failed to classifiy each examples.
-    """
-    nb_classifiers = len(example_errors)
-    nb_examples = len(list(example_errors.values())[0])
-    classifiers_names = list(example_errors.keys())
-
-    data_2d = np.zeros((nb_examples, nb_classifiers))
-    for classifierIndex, (classifier_name, error_on_examples) in enumerate(
-            example_errors.items()):
-        try:
-            data_2d[:, classifierIndex] = error_on_examples
-        except:
-            import pdb;
-            pdb.set_trace()
-    error_on_examples = -1 * np.sum(data_2d, axis=1) / nb_classifiers
-    return nb_classifiers, nb_examples, classifiers_names, data_2d, error_on_examples
-
-
-def publish_example_errors(example_errors, directory, databaseName,
-                           labels_names, example_ids, labels):
-    logging.debug("Start:\t Biclass Label analysis figure generation")
-
-    base_file_name = os.path.join(directory, databaseName + "-" + "_vs_".join(
-        labels_names) + "-")
-
-    nb_classifiers, nb_examples, classifiers_names, \
-    data_2d, error_on_examples = gen_error_data(example_errors)
-
-    np.savetxt(base_file_name + "2D_plot_data.csv", data_2d, delimiter=",")
-    np.savetxt(base_file_name + "bar_plot_data.csv", error_on_examples,
-               delimiter=",")
-
-    plot_2d(data_2d, classifiers_names, nb_classifiers, nb_examples,
-            base_file_name, example_ids=example_ids, labels=labels)
-
-    plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
-                    base_file_name)
-
-    logging.debug("Done:\t Biclass Label analysis figures generation")
-
-
-def plot_durations(durations, directory, database_name, durations_stds=None):
-    file_name = os.path.join(directory, database_name + "-durations")
-    durations.to_csv(file_name+"_dataframe.csv")
-    fig = plotly.graph_objs.Figure()
-    if durations_stds is None:
-        durations_stds = pd.DataFrame(0, durations.index, durations.columns)
-    else:
-        durations_stds.to_csv(file_name+"_stds_dataframe.csv")
-    fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization',
-                                        x=durations.index,
-                                        y=durations['hps'],
-                                        error_y=dict(type='data',
-                                                     array=durations_stds["hps"]),
-                                        marker_color="grey"))
-    fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)',
-                                        x=durations.index,
-                                        y=durations['fit'],
-                                        error_y=dict(type='data',
-                                                     array=durations_stds["fit"]),
-                                        marker_color="black"))
-    fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)',
-                                        x=durations.index,
-                                        y=durations['pred'],
-                                        error_y=dict(type='data',
-                                                     array=durations_stds["pred"]),
-                                        marker_color="lightgrey"))
-    fig.update_layout(title="Durations for each classfier",
-                      yaxis_title="Duration (s)")
-    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
-                      plot_bgcolor='rgba(0,0,0,0)')
-    plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
-
-
-def publish_feature_importances(feature_importances, directory, database_name,
-                                feature_stds=None):
-    for view_name, feature_importance in feature_importances.items():
-        if not os.path.exists(os.path.join(directory, "feature_importances")):
-            os.mkdir(os.path.join(directory, "feature_importances"))
-        file_name = os.path.join(directory, "feature_importances",
-                                 database_name + "-" + view_name
-                                 + "-feature_importances")
-        if feature_stds is not None:
-            feature_std = feature_stds[view_name]
-            feature_std.to_csv(file_name + "_dataframe_stds.csv")
-        else:
-            feature_std = pd.DataFrame(data=np.zeros(feature_importance.shape),
-                                       index=feature_importance.index,
-                                       columns=feature_importance.columns)
-        feature_importance.to_csv(file_name + "_dataframe.csv")
-        hover_text = [["-Feature :" + str(feature_name) +
-                       "<br>-Classifier : " + classifier_name +
-                       "<br>-Importance : " + str(
-            feature_importance.loc[feature_name][classifier_name]) +
-                       "<br>-STD : " + str(
-            feature_std.loc[feature_name][classifier_name])
-                       for classifier_name in list(feature_importance.columns)]
-                      for feature_name in list(feature_importance.index)]
-        fig = plotly.graph_objs.Figure(data=plotly.graph_objs.Heatmap(
-            x=list(feature_importance.columns),
-            y=list(feature_importance.index),
-            z=feature_importance.values,
-            text=hover_text,
-            hoverinfo=["text"],
-            colorscale="Greys",
-            reversescale=False))
-        fig.update_layout(
-            xaxis={"showgrid": False, "showticklabels": False, "ticks": ''},
-            yaxis={"showgrid": False, "showticklabels": False, "ticks": ''})
-        fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
-                          plot_bgcolor='rgba(0,0,0,0)')
-        plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
-
-        del fig
-
-
-def get_arguments(benchmark_argument_dictionaries, iter_index):
-    r"""Used to get the arguments passed to the benchmark executing function corresponding to the flag of a
-    biclass experimentation.
-
-    Parameters
-    ----------
-    flag : list
-        The needed experimentation's flag.
-    benchmark_argument_dictionaries : list of dicts
-        The list of all the arguments passed to the benchmark executing functions.
-
-    Returns
-    -------
-    benchmarkArgumentDictionary : dict
-        All the arguments passed to the benchmark executing function for the needed experimentation.
-    """
-    for benchmarkArgumentDictionary in benchmark_argument_dictionaries:
-        if benchmarkArgumentDictionary["flag"] == iter_index:
-            return benchmarkArgumentDictionary
-
-
-def get_feature_importances(result, feature_names=None):
-    r"""Extracts the feature importance from the monoview results and stores them in a dictionnary :
-    feature_importance[view_name] is a pandas.DataFrame of size n_feature*n_clf
-    containing a score of importance for each feature.
-
-    Parameters
-    ----------
-    result : list of results
-
-    Returns
-    -------
-    feature_importances : dict of pd.DataFrame
-        The dictionary containing all the feature importance for each view as pandas DataFrames
-    """
-    feature_importances = {}
-    for classifier_result in result:
-        if isinstance(classifier_result, MonoviewResult):
-            if classifier_result.view_name not in feature_importances:
-                feature_importances[classifier_result.view_name] = pd.DataFrame(
-                    index=feature_names)
-            if hasattr(classifier_result.clf, 'feature_importances_'):
-                feature_importances[classifier_result.view_name][
-                    classifier_result.classifier_name] = classifier_result.clf.feature_importances_
-            else:
-                feature_importances[classifier_result.view_name][
-                    classifier_result.classifier_name] = np.zeros(
-                    classifier_result.n_features)
-    return feature_importances
-
-
-def get_duration(results):
-    df = pd.DataFrame(columns=["hps", "fit", "pred"], )
-    for classifier_result in results:
-        df.at[classifier_result.get_classifier_name(),
-              "hps"] = classifier_result.hps_duration
-        df.at[classifier_result.get_classifier_name(),
-              "fit"] = classifier_result.fit_duration
-        df.at[classifier_result.get_classifier_name(),
-              "pred"] = classifier_result.pred_duration
-    return df
-
-
-def publish_tracebacks(directory, database_name, labels_names, tracebacks,
-                       iter_index):
-    if tracebacks:
-        with open(os.path.join(directory, database_name +
-                                          "-iter" + str(iter_index) +
-                                          "-tacebacks.txt"),
-                  "w") as traceback_file:
-            failed_list = save_dict_to_text(tracebacks, traceback_file)
-        flagged_list = [_ + "-iter" + str(iter_index) for _ in failed_list]
-    else:
-        flagged_list = {}
-    return flagged_list
-
-
-def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
-                       metrics, example_ids, labels):
-    r"""Used to extract and format the results of the different biclass experimentations performed.
-
-    Parameters
-    ----------
-    results : list
-        The result list returned by the benchmark execution function. For each executed benchmark, contains
-        a flag & a result element.
-        The flag is a way to identify to which benchmark the results belong, formatted this way :
-        `flag = iter_index, [classifierPositive, classifierNegative]` with
-        - `iter_index` the index of the statistical iteration
-        - `[classifierPositive, classifierNegative]` the indices of the labels considered positive and negative
-        by the classifier (mainly useful for one versus one multiclass classification).
-    benchmark_argument_dictionaries : list of dicts
-        The list of all the arguments passed to the benchmark executing functions.
-    statsIter : int
-        The number of statistical iterations.
-    metrics : list of lists
-        THe list containing the metrics and their configuration.
-
-    Returns
-    -------
-    biclassResults : list of dicts of dicts
-        The list contains a dictionary for each statistical iteration. This dictionary contains a dictionary for each
-        label combination, regrouping the scores for each metrics and the information useful to plot errors on examples.
-    """
-    logging.debug("Srart:\t Analzing all biclass resuls")
-    iter_results = {"metrics_scores": [i for i in range(stats_iter)],
-                    "example_errors": [i for i in range(stats_iter)],
-                    "feature_importances": [i for i in range(stats_iter)],
-                    "durations":[i for i in range(stats_iter)]}
-    flagged_tracebacks_list = []
-    fig_errors = []
-    for iter_index, result, tracebacks in results:
-        arguments = get_arguments(benchmark_argument_dictionaries, iter_index)
-
-        metrics_scores = get_metrics_scores(metrics, result)
-        example_errors = get_example_errors(labels, result)
-        feature_importances = get_feature_importances(result)
-        durations = get_duration(result)
-        directory = arguments["directory"]
-
-        database_name = arguments["args"]["name"]
-        labels_names = [arguments["labels_dictionary"][0],
-                        arguments["labels_dictionary"][1]]
-
-        flagged_tracebacks_list += publish_tracebacks(directory, database_name,
-                                                      labels_names, tracebacks,
-                                                      iter_index)
-        res = publish_metrics_graphs(metrics_scores, directory, database_name,
-                                     labels_names)
-        publish_example_errors(example_errors, directory, database_name,
-                               labels_names, example_ids, labels)
-        publish_feature_importances(feature_importances, directory,
-                                    database_name)
-        plot_durations(durations, directory, database_name)
-
-        iter_results["metrics_scores"][iter_index] = metrics_scores
-        iter_results["example_errors"][iter_index] = example_errors
-        iter_results["feature_importances"][iter_index] = feature_importances
-        iter_results["labels"] = labels
-        iter_results["durations"][iter_index] = durations
-
-    logging.debug("Done:\t Analzing all biclass resuls")
-
-    return res, iter_results, flagged_tracebacks_list
-
-
-def numpy_mean_and_std(scores_array):
-    return np.mean(scores_array, axis=1), np.std(scores_array, axis=1)
-
-
-def publish_all_metrics_scores(iter_results, directory,
-                               data_base_name, stats_iter,
-                               min_size=10):
-    results = []
-    secure_file_path(os.path.join(directory, "a"))
-
-    for metric_name, scores in iter_results.items():
-        train = np.array(scores["mean"].loc["train"])
-        test = np.array(scores["mean"].loc["test"])
-        names = np.array(scores["mean"].columns)
-        train_std = np.array(scores["std"].loc["train"])
-        test_std = np.array(scores["std"].loc["test"])
-
-        file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str(
-            stats_iter) + "_iter-" + metric_name)
-        nbResults = names.shape[0]
-
-        plot_metric_scores(train, test, names, nbResults,
-                           metric_name, file_name, tag=" averaged",
-                           train_STDs=train_std, test_STDs=test_std)
-        results += [[classifier_name, metric_name, test_mean, test_std]
-                    for classifier_name, test_mean, test_std
-                    in zip(names, test, test_std)]
-    return results
-
-
-def gen_error_data_glob(iter_results, stats_iter):
-    nb_examples = next(iter(iter_results.values())).shape[0]
-    nb_classifiers = len(iter_results)
-    data = np.zeros((nb_examples, nb_classifiers), dtype=int)
-    classifier_names = []
-    for clf_index, (classifier_name, error_data) in enumerate(
-            iter_results.items()):
-        data[:, clf_index] = error_data
-        classifier_names.append(classifier_name)
-    error_on_examples = -1 * np.sum(data, axis=1) + (
-                nb_classifiers * stats_iter)
-    return nb_examples, nb_classifiers, data, error_on_examples, classifier_names
-
-
-def publish_all_example_errors(iter_results, directory,
-                               stats_iter,
-                               example_ids, labels):
-    logging.debug(
-        "Start:\t Global biclass label analysis figure generation")
-
-    nbExamples, nbClassifiers, data, \
-    error_on_examples, classifier_names = gen_error_data_glob(iter_results,
-                                                              stats_iter)
-
-    np.savetxt(os.path.join(directory, "clf_errors.csv"), data, delimiter=",")
-    np.savetxt(os.path.join(directory, "example_errors.csv"), error_on_examples,
-               delimiter=",")
-
-    plot_2d(data, classifier_names, nbClassifiers, nbExamples,
-            os.path.join(directory, ""), stats_iter=stats_iter,
-            example_ids=example_ids, labels=labels)
-    plot_errors_bar(error_on_examples, nbClassifiers * stats_iter,
-                    nbExamples, os.path.join(directory, ""))
-
-    logging.debug(
-        "Done:\t Global biclass label analysis figures generation")
-
-
-
-def gen_classifiers_dict(results, metrics):
-    classifiers_dict = dict((classifier_name, classifierIndex)
-                            for classifierIndex, classifier_name
-                            in enumerate(
-        list(results[list(results.keys())[0]]["metrics_scores"][0][
-                 metrics[0][0]].columns)))
-    return classifiers_dict, len(classifiers_dict)
-
-
-def add_new_labels_combination(iterBiclassResults, labelsComination,
-                               nbClassifiers, nbExamples):
-    if labelsComination not in iterBiclassResults:
-        iterBiclassResults[labelsComination] = {}
-        iterBiclassResults[labelsComination]["metrics_scores"] = {}
-
-        iterBiclassResults[labelsComination]["error_on_examples"] = np.zeros(
-            (nbClassifiers,
-             nbExamples),
-            dtype=int)
-    return iterBiclassResults
-
-
-def add_new_metric(iter_biclass_results, metric, labels_combination,
-                   nb_classifiers,
-                   stats_iter):
-    if metric[0] not in iter_biclass_results[labels_combination][
-        "metrics_scores"]:
-        iter_biclass_results[labels_combination]["metrics_scores"][
-            metric[0]] = {
-            "train_scores":
-                np.zeros((nb_classifiers, stats_iter)),
-            "test_scores":
-                np.zeros((nb_classifiers, stats_iter))}
-    return iter_biclass_results
-
-
-def format_previous_results(iter_results_lists):
-    """
-    Formats each statistical iteration's result into a mean/std analysis for
-    the metrics and adds the errors of each statistical iteration.
-
-    Parameters
-    ----------
-    iter_results_lists : The raw results, for each statistical iteration i contains
-        - biclass_results[i]["metrics_scores"] is a dictionary with a pd.dataframe
-          for each metrics
-        - biclass_results[i]["example_errors"], a dicaitonary with a np.array
-        for each classifier.
-
-    Returns
-    -------
-    metrics_analysis : The mean and std dataframes for each metrics
-
-    error_analysis : A dictionary containing the added errors
-                     arrays for each classifier
-
-    """
-    metrics_analysis = {}
-    feature_importances_analysis = {}
-    feature_importances_stds = {}
-    # labels = dict((key,"") for key in biclass_results.keys())
-    # for biclass_result in biclass_results.items():
-
-    metric_concat_dict = {}
-    for iter_index, metrics_score in enumerate(
-            iter_results_lists["metrics_scores"]):
-        for metric_name, dataframe in metrics_score.items():
-            if metric_name not in metric_concat_dict:
-                metric_concat_dict[metric_name] = dataframe
-            else:
-                metric_concat_dict[metric_name] = pd.concat(
-                    [metric_concat_dict[metric_name], dataframe])
-
-    for metric_name, dataframe in metric_concat_dict.items():
-        metrics_analysis[metric_name] = {}
-        metrics_analysis[metric_name][
-            "mean"] = dataframe.groupby(dataframe.index).mean()
-        metrics_analysis[metric_name][
-            "std"] = dataframe.groupby(dataframe.index).std(ddof=0)
-
-    durations_df_concat = pd.DataFrame(dtype=float)
-    for iter_index, durations_df in enumerate(iter_results_lists["durations"]):
-        durations_df_concat = pd.concat((durations_df_concat, durations_df),
-                                        axis=1)
-    durations_df_concat = durations_df_concat.astype(float)
-    grouped_df = durations_df_concat.groupby(durations_df_concat.columns, axis=1)
-    duration_means = grouped_df.mean()
-    duration_stds = grouped_df.std()
-
-    importance_concat_dict = {}
-    for iter_index, view_feature_importances in enumerate(
-            iter_results_lists["feature_importances"]):
-        for view_name, feature_importances in view_feature_importances.items():
-            if view_name not in importance_concat_dict:
-                importance_concat_dict[view_name] = feature_importances
-            else:
-                importance_concat_dict[view_name] = pd.concat(
-                    [importance_concat_dict[view_name], feature_importances])
-
-    for view_name, dataframe in importance_concat_dict.items():
-        feature_importances_analysis[view_name] = dataframe.groupby(
-            dataframe.index).mean()
-
-        feature_importances_stds[view_name] = dataframe.groupby(
-            dataframe.index).std(ddof=0)
-
-    added_example_errors = {}
-    for example_errors in iter_results_lists["example_errors"]:
-        for classifier_name, errors in example_errors.items():
-            if classifier_name not in added_example_errors:
-                added_example_errors[classifier_name] = errors
-            else:
-                added_example_errors[classifier_name] += errors
-    error_analysis = added_example_errors
-    return metrics_analysis, error_analysis, feature_importances_analysis, feature_importances_stds, \
-           iter_results_lists["labels"], duration_means, duration_stds
-
-
-def analyze_all(biclass_results, stats_iter, directory, data_base_name,
-                example_ids):
-    """Used to format the results in order to plot the mean results on the iterations"""
-    metrics_analysis, error_analysis, \
-    feature_importances, feature_importances_stds, \
-    labels, duration_means, \
-    duration_stds = format_previous_results(biclass_results)
-
-    results = publish_all_metrics_scores(metrics_analysis,
-                                         directory,
-                                         data_base_name, stats_iter)
-    publish_all_example_errors(error_analysis, directory, stats_iter,
-                               example_ids, labels)
-    publish_feature_importances(feature_importances, directory,
-                                data_base_name, feature_importances_stds)
-    plot_durations(duration_means, directory, data_base_name, duration_stds)
-    return results
-
-
-def save_failed(failed_list, directory):
-    with open(os.path.join(directory, "failed_algorithms.txt"),
-              "w") as failed_file:
-        failed_file.write(
-            "The following algorithms sent an error, the tracebacks are stored in the coressponding directory :\n")
-        failed_file.write(", \n".join(failed_list) + ".")
-
-
-def get_results(results, stats_iter, benchmark_argument_dictionaries,
-                metrics, directory, example_ids, labels):
-    """Used to analyze the results of the previous benchmarks"""
-    data_base_name = benchmark_argument_dictionaries[0]["args"]["name"]
-
-    results_means_std, biclass_results, flagged_failed = analyze_iterations(
-        results, benchmark_argument_dictionaries,
-        stats_iter, metrics, example_ids, labels)
-    if flagged_failed:
-        save_failed(flagged_failed, directory)
-
-    if stats_iter > 1:
-        results_means_std = analyze_all(
-            biclass_results, stats_iter, directory,
-            data_base_name, example_ids)
-    return results_means_std
-
-
-
-
-# def publish_iter_multiclass_metrics_scores(iter_multiclass_results, classifiers_names,
-#                                            data_base_name, directory, stats_iter,
-#                                            min_size=10):
-#     results = []
-#     for metric_name, scores in iter_multiclass_results["metrics_scores"].items():
-#         trainMeans, trainSTDs = numpy_mean_and_std(scores["train_scores"])
-#         testMeans, testSTDs = numpy_mean_and_std(scores["test_scores"])
-#
-#         nb_results = classifiers_names.shape[0]
-#
-#         file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str(
-#             stats_iter) + "_iter-" + metric_name + ".png")
-#
-#         plot_metric_scores(trainMeans, testMeans, classifiers_names, nb_results,
-#                            metric_name, file_name, tag=" averaged multiclass",
-#                            train_STDs=trainSTDs, test_STDs=testSTDs)
-#
-#         results+=[[classifiers_name, metric_name,testMean, testSTD] for classifiers_name, testMean, testSTD in zip(classifiers_names, testMeans, testSTDs)]
-#     return results
-
-
-# def publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
-#                                            classifiers_names, stats_iter, example_ids, multiclass_labels, min_size=10):
-#     logging.debug(
-#         "Start:\t Global multiclass label analysis figures generation")
-#     nb_examples, nb_classifiers, data, error_on_examples, classifiers_names = gen_error_data_glob(
-#         dict((clf_name, combi_res)
-#              for clf_name, combi_res
-#              in zip(classifiers_names,
-#                     iter_multiclass_results["error_on_examples"])),
-#              stats_iter)
-#
-#     plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
-#             directory, stats_iter=stats_iter,
-#             example_ids=example_ids, labels=multiclass_labels)
-#
-#     plot_errors_bar(error_on_examples, nb_classifiers * stats_iter, nb_examples,
-#                     directory)
-#
-#     logging.debug("Done:\t Global multiclass label analysis figures generation")
-
-
-# def gen_metrics_scores_multiclass(results, true_labels, metrics_list,
-#                                   arguments_dictionaries):
-#     """Used to add all the metrics scores to the multiclass result structure  for each clf and each iteration"""
-#
-#     logging.debug("Start:\t Getting multiclass scores for each metric")
-#
-#     for metric in metrics_list:
-#         metric_module = getattr(metrics, metric[0])
-#         for iter_index, iter_results in enumerate(results):
-#
-#             for argumentsDictionary in arguments_dictionaries:
-#                 if argumentsDictionary["flag"][0] == iter_index:
-#                     classification_indices = argumentsDictionary[
-#                         "classification_indices"]
-#             train_indices, test_indices, multiclass_test_indices = classification_indices
-#
-#             for classifier_name, resultDictionary in iter_results.items():
-#                 if not "metrics_scores" in resultDictionary:
-#                     results[iter_index][classifier_name]["metrics_scores"] = {}
-#                 train_score = metric_module.score(true_labels[train_indices],
-#                                                 resultDictionary["labels"][
-#                                                     train_indices],
-#                                                 multiclass=True)
-#                 test_score = metric_module.score(
-#                     true_labels[multiclass_test_indices],
-#                     resultDictionary["labels"][multiclass_test_indices],
-#                     multiclass=True)
-#                 results[iter_index][classifier_name]["metrics_scores"][
-#                     metric[0]] = [train_score, test_score]
-#     logging.debug("Done:\t Getting multiclass scores for each metric")
-#     return results
-
-
-# def get_error_on_labels_multiclass(multiclass_results, multiclass_labels):
-#     """Used to add all the arrays showing on which example there is an error for each clf and each iteration"""
-#
-#     logging.debug("Start:\t Getting errors on each example for each classifier")
-#
-#     for iter_index, iter_results in enumerate(multiclass_results):
-#         for classifier_name, classifier_results in iter_results.items():
-#             error_on_examples = classifier_results["labels"] == multiclass_labels
-#             multiclass_results[iter_index][classifier_name][
-#                 "error_on_examples"] = error_on_examples.astype(int)
-#
-#     logging.debug("Done:\t Getting errors on each example for each classifier")
-#
-#     return multiclass_results
-
-
-# def publishMulticlassScores(multiclass_results, metrics, stats_iter, direcories,
-#                             databaseName):
-#     results=[]
-#     for iter_index in range(stats_iter):
-#         directory = direcories[iter_index]
-#         for metric in metrics:
-#             logging.debug(
-#                 "Start:\t Multiclass score graph generation for " + metric[0])
-#             classifiers_names = np.array([classifier_name for classifier_name in
-#                                          multiclass_results[iter_index].keys()])
-#             train_scores = np.array([multiclass_results[iter_index][
-#                                         classifier_name]["metrics_scores"][
-#                                         metric[0]][0]
-#                                     for classifier_name in classifiers_names])
-#             validationScores = np.array([multiclass_results[iter_index][
-#                                              classifier_name]["metrics_scores"][
-#                                              metric[0]][1]
-#                                          for classifier_name in
-#                                          classifiers_names])
-#
-#             nbResults = classifiers_names.shape[0]
-#             fileName = os.path.join(directory , time.strftime(
-#                 "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[
-#                            0])
-#
-#             plot_metric_scores(train_scores, validationScores, classifiers_names,
-#                                nbResults, metric[0], fileName, tag=" multiclass")
-#
-#             logging.debug(
-#                 "Done:\t Multiclass score graph generation for " + metric[0])
-#             results+=[[classifiersName, metric, testMean, testSTD] for classifiersName, testMean, testSTD in zip(classifiers_names, validationScores, np.zeros(len(validationScores)))]
-#     return results
-
-
-# def publishMulticlassExmapleErrors(multiclass_results, directories,
-#                                    databaseName, example_ids, multiclass_labels):
-#     for iter_index, multiclass_result in enumerate(multiclass_results):
-#         directory = directories[iter_index]
-#         logging.debug("Start:\t Multiclass Label analysis figure generation")
-#
-#         base_file_name = os.path.join(directory, time.strftime(
-#             "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-")
-#         nb_classifiers, nb_examples, classifiers_names, data, error_on_examples = gen_error_data(
-#                                                                                         dict((key, multiclass_result[key]['error_on_examples'])
-#                                                                                              for key in multiclass_result.keys()),)
-#         plot_2d(data, classifiers_names, nb_classifiers, nb_examples,
-#                 base_file_name, example_ids=example_ids, labels=multiclass_labels)
-#
-#         plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
-#                         base_file_name)
-#
-#         logging.debug("Done:\t Multiclass Label analysis figure generation")
-
-#
-# def analyzeMulticlass(results, stats_iter, benchmark_argument_dictionaries,
-#                       nb_examples, nb_labels, multiclass_labels,
-#                       metrics, classification_indices, directories, example_ids):
-#     """Used to transform one versus one results in multiclass results and to publish it"""
-#     multiclass_results = [{} for _ in range(stats_iter)]
-#
-#     for flag, result, tracebacks in results:
-#         iter_index = flag[0]
-#         classifierPositive = flag[1][0]
-#         classifierNegative = flag[1][1]
-#
-#         for benchmarkArgumentDictionary in benchmark_argument_dictionaries:
-#             if benchmarkArgumentDictionary["flag"] == flag:
-#                 trainIndices, testIndices, testMulticlassIndices = \
-#                 benchmarkArgumentDictionary["classification_indices"]
-#
-#         for classifierResult in result:
-#             classifier_name = classifierResult.get_classifier_name()
-#             if classifier_name not in multiclass_results[iter_index]:
-#                 multiclass_results[iter_index][classifier_name] = np.zeros(
-#                     (nb_examples, nb_labels), dtype=int)
-#             for exampleIndex in trainIndices:
-#                 label = classifierResult.full_labels_pred[exampleIndex]
-#                 if label == 1:
-#                     multiclass_results[iter_index][classifier_name][
-#                         exampleIndex, classifierPositive] += 1
-#                 else:
-#                     multiclass_results[iter_index][classifier_name][
-#                         exampleIndex, classifierNegative] += 1
-#             for multiclassIndex, exampleIndex in enumerate(
-#                     testMulticlassIndices):
-#                 label = classifierResult.y_test_multiclass_pred[multiclassIndex]
-#                 if label == 1:
-#                     multiclass_results[iter_index][classifier_name][
-#                         exampleIndex, classifierPositive] += 1
-#                 else:
-#                     multiclass_results[iter_index][classifier_name][
-#                         exampleIndex, classifierNegative] += 1
-#
-#     for iter_index, multiclassiterResult in enumerate(multiclass_results):
-#         for key, value in multiclassiterResult.items():
-#             multiclass_results[iter_index][key] = {
-#                 "labels": np.argmax(value, axis=1)}
-#
-#     multiclass_results = gen_metrics_scores_multiclass(multiclass_results,
-#                                                    multiclass_labels, metrics,
-#                                                    benchmark_argument_dictionaries)
-#     multiclass_results = get_error_on_labels_multiclass(multiclass_results,
-#                                                    multiclass_labels)
-#
-#     results = publishMulticlassScores(multiclass_results, metrics, stats_iter, directories,
-#                             benchmark_argument_dictionaries[0]["args"]["name"])
-#     publishMulticlassExmapleErrors(multiclass_results, directories,
-#                                    benchmark_argument_dictionaries[0][
-#                                        "args"]["name"], example_ids, multiclass_labels)
-#
-#     return results, multiclass_results
-
-
-# def analyze_iter_multiclass(multiclass_results, directory, stats_iter, metrics,
-#                            data_base_name, nb_examples, example_ids, multiclass_labels):
-#     """Used to mean the multiclass results on the iterations executed with different random states"""
-#
-#     logging.debug("Start:\t Getting mean results for multiclass classification")
-#     iter_multiclass_results = {}
-#     nb_classifiers = len(multiclass_results[0])
-#     iter_multiclass_results["error_on_examples"] = np.zeros(
-#         (nb_classifiers, nb_examples), dtype=int)
-#     iter_multiclass_results["metrics_scores"] = {}
-#     classifiers_names = []
-#     for iter_index, multiclass_result in enumerate(multiclass_results):
-#         for classifier_name, classifier_results in multiclass_result.items():
-#             if classifier_name not in classifiers_names:
-#                 classifiers_names.append(classifier_name)
-#             classifier_index = classifiers_names.index(classifier_name)
-#             for metric in metrics:
-#                 if metric[0] not in iter_multiclass_results["metrics_scores"]:
-#                     iter_multiclass_results["metrics_scores"][metric[0]] = {
-#                         "train_scores":
-#                             np.zeros((nb_classifiers, stats_iter)),
-#                         "test_scores":
-#                             np.zeros((nb_classifiers, stats_iter))}
-#                 iter_multiclass_results["metrics_scores"][metric[0]][
-#                     "train_scores"][classifier_index, iter_index] = \
-#                 classifier_results["metrics_scores"][metric[0]][0]
-#                 iter_multiclass_results["metrics_scores"][metric[0]]["test_scores"][
-#                     classifier_index, iter_index] = \
-#                 classifier_results["metrics_scores"][metric[0]][1]
-#             iter_multiclass_results["error_on_examples"][classifier_index, :] += \
-#             classifier_results["error_on_examples"]
-#     logging.debug("Start:\t Getting mean results for multiclass classification")
-#
-#     classifiers_names = np.array(classifiers_names)
-#     results = publish_iter_multiclass_metrics_scores(
-#         iter_multiclass_results, classifiers_names,
-#         data_base_name, directory, stats_iter)
-#     publish_iter_multiclass_example_errors(iter_multiclass_results, directory,
-#                                        classifiers_names, stats_iter, example_ids, multiclass_labels)
-#     return results
\ No newline at end of file
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/__init__.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/duration_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/duration_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac3158805fd7c8282ec8e8077bc55fa0b87b594f
--- /dev/null
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/duration_analysis.py
@@ -0,0 +1,47 @@
+import os
+import plotly
+import pandas as pd
+
+
+def get_duration(results):
+    df = pd.DataFrame(columns=["hps", "fit", "pred"], )
+    for classifier_result in results:
+        df.at[classifier_result.get_classifier_name(),
+              "hps"] = classifier_result.hps_duration
+        df.at[classifier_result.get_classifier_name(),
+              "fit"] = classifier_result.fit_duration
+        df.at[classifier_result.get_classifier_name(),
+              "pred"] = classifier_result.pred_duration
+    return df
+
+def plot_durations(durations, directory, database_name, durations_stds=None):
+    file_name = os.path.join(directory, database_name + "-durations")
+    durations.to_csv(file_name+"_dataframe.csv")
+    fig = plotly.graph_objs.Figure()
+    if durations_stds is None:
+        durations_stds = pd.DataFrame(0, durations.index, durations.columns)
+    else:
+        durations_stds.to_csv(file_name+"_stds_dataframe.csv")
+    fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization',
+                                        x=durations.index,
+                                        y=durations['hps'],
+                                        error_y=dict(type='data',
+                                                     array=durations_stds["hps"]),
+                                        marker_color="grey"))
+    fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)',
+                                        x=durations.index,
+                                        y=durations['fit'],
+                                        error_y=dict(type='data',
+                                                     array=durations_stds["fit"]),
+                                        marker_color="black"))
+    fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)',
+                                        x=durations.index,
+                                        y=durations['pred'],
+                                        error_y=dict(type='data',
+                                                     array=durations_stds["pred"]),
+                                        marker_color="lightgrey"))
+    fig.update_layout(title="Durations for each classfier",
+                      yaxis_title="Duration (s)")
+    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
+                      plot_bgcolor='rgba(0,0,0,0)')
+    plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
\ No newline at end of file
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..f16be40abfd8691a3f194d4826214d6a5454d843
--- /dev/null
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/error_analysis.py
@@ -0,0 +1,298 @@
+# Import built-in modules
+import logging
+import os
+
+import matplotlib as mpl
+# Import third party modules
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly
+from matplotlib.patches import Patch
+
+# Import own Modules
+
+
+def get_example_errors(groud_truth, results):
+    r"""Used to get for each classifier and each example whether the classifier
+     has misclassified the example or not.
+
+    Parameters
+    ----------
+    ground_truth : numpy array of 0, 1 and -100 (if multiclass)
+        The array with the real labels of the examples
+    results : list of MonoviewResult and MultiviewResults objects
+        A list containing all the resluts for all the mono- & multi-view
+        experimentations.
+
+    Returns
+    -------
+    example_errors : dict of np.array
+        For each classifier, has an entry with a `np.array` over the examples,
+         with a 1 if the examples was
+        well-classified, a 0 if not and if it's multiclass classification, a
+         -100 if the examples was not seen during
+        the one versus one classification.
+    """
+    example_errors = {}
+
+    for classifier_result in results:
+        error_on_examples = np.equal(classifier_result.full_labels_pred,
+                                     groud_truth).astype(int)
+        unseen_examples = np.where(groud_truth == -100)[0]
+        error_on_examples[unseen_examples] = -100
+        example_errors[
+            classifier_result.get_classifier_name()] = error_on_examples
+    return example_errors
+
+
+def publish_example_errors(example_errors, directory, databaseName,
+                           labels_names, example_ids, labels):
+    logging.debug("Start:\t Biclass Label analysis figure generation")
+
+    base_file_name = os.path.join(directory, databaseName + "-" + "_vs_".join(
+        labels_names) + "-")
+
+    nb_classifiers, nb_examples, classifiers_names, \
+    data_2d, error_on_examples = gen_error_data(example_errors)
+
+    np.savetxt(base_file_name + "2D_plot_data.csv", data_2d, delimiter=",")
+    np.savetxt(base_file_name + "bar_plot_data.csv", error_on_examples,
+               delimiter=",")
+
+    plot_2d(data_2d, classifiers_names, nb_classifiers, nb_examples,
+            base_file_name, example_ids=example_ids, labels=labels)
+
+    plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
+                    base_file_name)
+
+    logging.debug("Done:\t Biclass Label analysis figures generation")
+
+def publish_all_example_errors(iter_results, directory,
+                               stats_iter,
+                               example_ids, labels):
+    logging.debug(
+        "Start:\t Global label analysis figure generation")
+
+    nbExamples, nbClassifiers, data, \
+    error_on_examples, classifier_names = gen_error_data_glob(iter_results,
+                                                              stats_iter)
+
+    np.savetxt(os.path.join(directory, "clf_errors.csv"), data, delimiter=",")
+    np.savetxt(os.path.join(directory, "example_errors.csv"), error_on_examples,
+               delimiter=",")
+
+    plot_2d(data, classifier_names, nbClassifiers, nbExamples,
+            os.path.join(directory, ""), stats_iter=stats_iter,
+            example_ids=example_ids, labels=labels)
+    plot_errors_bar(error_on_examples, nbClassifiers * stats_iter,
+                    nbExamples, os.path.join(directory, ""))
+
+    logging.debug(
+        "Done:\t Global label analysis figures generation")
+
+
+
+def gen_error_data(example_errors):
+    r"""Used to format the error data in order to plot it efficiently. The
+    data is saves in a `.csv` file.
+
+    Parameters
+    ----------
+    example_errors : dict of dicts of np.arrays
+        A dictionary conatining all the useful data. Organized as :
+        `example_errors[<classifier_name>]["error_on_examples"]` is a np.array
+        of ints with a
+        - 1 if the classifier `<classifier_name>` classifier well the example,
+        - 0 if it fail to classify the example,
+        - -100 if it did not classify the example (multiclass one versus one).
+    base_file_name : list of str
+        The name of the file in which the figure will be saved
+        ("2D_plot_data.csv" and "bar_plot_data.csv" will
+        be added at the end).
+    nbCopies : int, optinal, default: 2
+        The number of times the data is copied (classifier wise) in order for
+        the figure to be more readable.
+
+
+    Returns
+    -------
+    nbClassifiers : int
+        Number of different classifiers.
+    nbExamples : int
+        NUmber of examples.
+    nbCopies : int
+        The number of times the data is copied (classifier wise) in order for
+        the figure to be more readable.
+    classifiers_names : list of strs
+        The names fo the classifiers.
+    data : np.array of shape `(nbClassifiers, nbExamples)`
+        A matrix with zeros where the classifier failed to classifiy the
+        example, ones where it classified it well
+        and -100 if the example was not classified.
+    error_on_examples : np.array of shape `(nbExamples,)`
+        An array counting how many classifiers failed to classifiy each
+        examples.
+    """
+    nb_classifiers = len(example_errors)
+    nb_examples = len(list(example_errors.values())[0])
+    classifiers_names = list(example_errors.keys())
+
+    data_2d = np.zeros((nb_examples, nb_classifiers))
+    for classifierIndex, (classifier_name, error_on_examples) in enumerate(
+            example_errors.items()):
+        try:
+            data_2d[:, classifierIndex] = error_on_examples
+        except:
+            import pdb;
+            pdb.set_trace()
+    error_on_examples = -1 * np.sum(data_2d, axis=1) / nb_classifiers
+    return nb_classifiers, nb_examples, classifiers_names, data_2d, error_on_examples
+
+def gen_error_data_glob(iter_results, stats_iter):
+    nb_examples = next(iter(iter_results.values())).shape[0]
+    nb_classifiers = len(iter_results)
+    data = np.zeros((nb_examples, nb_classifiers), dtype=int)
+    classifier_names = []
+    for clf_index, (classifier_name, error_data) in enumerate(
+            iter_results.items()):
+        data[:, clf_index] = error_data
+        classifier_names.append(classifier_name)
+    error_on_examples = -1 * np.sum(data, axis=1) + (
+                nb_classifiers * stats_iter)
+    return nb_examples, nb_classifiers, data, error_on_examples, \
+           classifier_names
+
+
+def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
+            file_name, minSize=10, labels=None,
+            width_denominator=2.0, height_denominator=20.0, stats_iter=1,
+            use_plotly=True, example_ids=None):
+    r"""Used to generate a 2D plot of the errors.
+
+    Parameters
+    ----------
+    data : np.array of shape `(nbClassifiers, nbExamples)`
+        A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well
+        and -100 if the example was not classified.
+    classifiers_names : list of str
+        The names of the classifiers.
+    nbClassifiers : int
+        The number of classifiers.
+    nbExamples : int
+        The number of examples.
+    nbCopies : int
+        The number of times the data is copied (classifier wise) in order for the figure to be more readable
+    file_name : str
+        The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end)
+    minSize : int, optinal, default: 10
+        The minimum width and height of the figure.
+    width_denominator : float, optional, default: 1.0
+        To obtain the image width, the number of classifiers will be divided by this number.
+    height_denominator : float, optional, default: 1.0
+        To obtain the image width, the number of examples will be divided by this number.
+    stats_iter : int, optional, default: 1
+        The number of statistical iterations realized.
+
+    Returns
+    -------
+    """
+    fig, ax = plt.subplots(nrows=1, ncols=1, )
+    cmap, norm = iter_cmap(stats_iter)
+    cax = plt.imshow(data, cmap=cmap, norm=norm,
+                     aspect='auto')
+    plt.title('Errors depending on the classifier')
+    ticks = np.arange(0, nbClassifiers, 1)
+    tick_labels = classifiers_names
+    plt.xticks(ticks, tick_labels, rotation="vertical")
+    cbar = fig.colorbar(cax, ticks=[-100 * stats_iter / 2, 0, stats_iter])
+    cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right'])
+
+    fig.savefig(file_name + "error_analysis_2D.png", bbox_inches="tight",
+                transparent=True)
+    plt.close()
+    ### The following part is used to generate an interactive graph.
+    if use_plotly:
+        label_index_list = np.concatenate([np.where(labels == i)[0] for i in
+                                           np.unique(
+                                               labels)])  # [np.where(labels==i)[0] for i in np.unique(labels)]
+        hover_text = [[example_ids[example_index] + " failed " + str(
+            stats_iter - data[
+                example_index, classifier_index]) + " time(s), labelled " + str(
+            labels[example_index])
+                       for classifier_index in range(data.shape[1])]
+                      for example_index in range(data.shape[0])]
+        fig = plotly.graph_objs.Figure()
+        # for row_index, label_index in enumerate(label_index_list):
+        fig.add_trace(plotly.graph_objs.Heatmap(
+            x=list(classifiers_names),
+            y=[example_ids[label_ind] for label_ind in label_index_list],
+            z=data[label_index_list, :],
+            text=[hover_text[label_ind] for label_ind in label_index_list],
+            hoverinfo=["y", "x", "text"],
+            colorscale="Greys",
+            colorbar=dict(tickvals=[0, stats_iter],
+                          ticktext=["Always Wrong", "Always Right"]),
+            reversescale=True), )
+        fig.update_yaxes(title_text="Examples", showticklabels=False, ticks='')
+        fig.update_xaxes(showticklabels=False, )
+        fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
+                          plot_bgcolor='rgba(0,0,0,0)')
+        fig.update_xaxes(showticklabels=True, )
+        plotly.offline.plot(fig, filename=file_name + "error_analysis_2D.html",
+                            auto_open=False)
+        del fig
+
+
+def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName):
+    r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples
+
+    Parameters
+    ----------
+    error_on_examples : np.array of shape `(nbExamples,)`
+        An array counting how many classifiers failed to classifiy each examples.
+    classifiers_names : list of str
+        The names of the classifiers.
+    nbClassifiers : int
+        The number of classifiers.
+    nbExamples : int
+        The number of examples.
+    fileName : str
+        The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end)
+
+    Returns
+    -------
+    """
+    fig, ax = plt.subplots()
+    x = np.arange(nbExamples)
+    plt.bar(x, error_on_examples)
+    plt.ylim([0, nbClassifiers])
+    plt.title("Number of classifiers that failed to classify each example")
+    fig.savefig(fileName + "error_analysis_bar.png", transparent=True)
+    plt.close()
+
+
+def iter_cmap(statsIter):
+    r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better.
+
+    Parameters
+    ----------
+    statsIter : int
+        The number of statistical iterations.
+
+    Returns
+    -------
+    cmap : matplotlib.colors.ListedColorMap object
+        The colormap.
+    norm : matplotlib.colors.BoundaryNorm object
+        The bounds for the colormap.
+    """
+    cmapList = ["red", "0.0"] + [str(float((i + 1)) / statsIter) for i in
+                                 range(statsIter)]
+    cmap = mpl.colors.ListedColormap(cmapList)
+    bounds = [-100 * statsIter - 0.5, -0.5]
+    for i in range(statsIter):
+        bounds.append(i + 0.5)
+    bounds.append(statsIter + 0.5)
+    norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
+    return cmap, norm
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/execution.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/execution.py
new file mode 100644
index 0000000000000000000000000000000000000000..c62425c945d6f26747d67d860b89155913a33fb8
--- /dev/null
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/execution.py
@@ -0,0 +1,225 @@
+import logging
+import pandas as pd
+
+from .tracebacks_analysis import save_failed, publish_tracebacks
+from .duration_analysis import plot_durations, get_duration
+from .metric_analysis import get_metrics_scores, publish_metrics_graphs, publish_all_metrics_scores
+from .error_analysis import get_example_errors, publish_example_errors, publish_all_example_errors
+from .feature_importances import get_feature_importances, publish_feature_importances
+
+def analyze(results, stats_iter, benchmark_argument_dictionaries,
+                metrics, directory, example_ids, labels):
+    """Used to analyze the results of the previous benchmarks"""
+    data_base_name = benchmark_argument_dictionaries[0]["args"]["name"]
+
+    results_means_std, iter_results, flagged_failed = analyze_iterations(
+        results, benchmark_argument_dictionaries,
+        stats_iter, metrics, example_ids, labels)
+    if flagged_failed:
+        save_failed(flagged_failed, directory)
+
+    if stats_iter > 1:
+        results_means_std = analyze_all(
+            iter_results, stats_iter, directory,
+            data_base_name, example_ids)
+    return results_means_std
+
+
+def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
+                       metrics, example_ids, labels):
+    r"""Used to extract and format the results of the different
+    experimentations performed.
+
+    Parameters
+    ----------
+    results : list
+        The result list returned by the benchmark execution function. For each
+         executed benchmark, contains
+        a flag & a result element.
+        The flag is a way to identify to which benchmark the results belong,
+        formatted this way :
+        `flag = iter_index, [classifierPositive, classifierNegative]` with
+        - `iter_index` the index of the statistical iteration
+        - `[classifierPositive, classifierNegative]` the indices of the labels
+        considered positive and negative
+        by the classifier (mainly useful for one versus one multiclass
+        classification).
+    benchmark_argument_dictionaries : list of dicts
+        The list of all the arguments passed to the benchmark executing
+        functions.
+    statsIter : int
+        The number of statistical iterations.
+    metrics : list of lists
+        THe list containing the metrics and their configuration.
+
+    Returns
+    -------
+    results : list of dicts of dicts
+        The list contains a dictionary for each statistical iteration. This
+        dictionary contains a dictionary for each
+        label combination, regrouping the scores for each metrics and the
+        information useful to plot errors on examples.
+    """
+    logging.debug("Start:\t Analyzing all results")
+    iter_results = {"metrics_scores": [i for i in range(stats_iter)],
+                    "example_errors": [i for i in range(stats_iter)],
+                    "feature_importances": [i for i in range(stats_iter)],
+                    "durations":[i for i in range(stats_iter)]}
+    flagged_tracebacks_list = []
+    fig_errors = []
+    for iter_index, result, tracebacks in results:
+        arguments = get_arguments(benchmark_argument_dictionaries, iter_index)
+
+        metrics_scores = get_metrics_scores(metrics, result)
+        example_errors = get_example_errors(labels, result)
+        feature_importances = get_feature_importances(result)
+        durations = get_duration(result)
+        directory = arguments["directory"]
+
+        database_name = arguments["args"]["name"]
+        labels_names = [arguments["labels_dictionary"][0],
+                        arguments["labels_dictionary"][1]]
+
+        flagged_tracebacks_list += publish_tracebacks(directory, database_name,
+                                                      labels_names, tracebacks,
+                                                      iter_index)
+        res = publish_metrics_graphs(metrics_scores, directory, database_name,
+                                     labels_names)
+        publish_example_errors(example_errors, directory, database_name,
+                               labels_names, example_ids, labels)
+        publish_feature_importances(feature_importances, directory,
+                                    database_name)
+        plot_durations(durations, directory, database_name)
+
+        iter_results["metrics_scores"][iter_index] = metrics_scores
+        iter_results["example_errors"][iter_index] = example_errors
+        iter_results["feature_importances"][iter_index] = feature_importances
+        iter_results["labels"] = labels
+        iter_results["durations"][iter_index] = durations
+
+    logging.debug("Done:\t Analyzing all results")
+
+    return res, iter_results, flagged_tracebacks_list
+
+def analyze_all(iter_results, stats_iter, directory, data_base_name,
+                example_ids):
+    """Used to format the results in order to plot the mean results on
+    the iterations"""
+    metrics_analysis, error_analysis, feature_importances, \
+    feature_importances_stds, labels, duration_means, \
+    duration_stds = format_previous_results(iter_results)
+
+    results = publish_all_metrics_scores(metrics_analysis,
+                                         directory,
+                                         data_base_name, stats_iter)
+    publish_all_example_errors(error_analysis, directory, stats_iter,
+                               example_ids, labels)
+    publish_feature_importances(feature_importances, directory,
+                                data_base_name, feature_importances_stds)
+    plot_durations(duration_means, directory, data_base_name, duration_stds)
+    return results
+
+def get_arguments(benchmark_argument_dictionaries, iter_index):
+    r"""Used to get the arguments passed to the benchmark executing function
+    corresponding to the flag of an
+    experimentation.
+
+    Parameters
+    ----------
+    flag : list
+        The needed experimentation's flag.
+    benchmark_argument_dictionaries : list of dicts
+        The list of all the arguments passed to the benchmark executing
+        functions.
+
+    Returns
+    -------
+    benchmark_argument_dictionary : dict
+        All the arguments passed to the benchmark executing function for the
+        needed experimentation.
+    """
+    for benchmark_argument_dictionary in benchmark_argument_dictionaries:
+        if benchmark_argument_dictionary["flag"] == iter_index:
+            return benchmark_argument_dictionary
+
+
+def format_previous_results(iter_results_lists):
+    """
+    Formats each statistical iteration's result into a mean/std analysis for
+    the metrics and adds the errors of each statistical iteration.
+
+    Parameters
+    ----------
+    iter_results_lists : The raw results, for each statistical iteration i
+     contains
+        - biclass_results[i]["metrics_scores"] is a dictionary with a
+        pd.dataframe for each metrics
+        - biclass_results[i]["example_errors"], a dicaitonary with a np.array
+        for each classifier.
+
+    Returns
+    -------
+    metrics_analysis : The mean and std dataframes for each metrics
+
+    error_analysis : A dictionary containing the added errors
+                     arrays for each classifier
+
+    """
+    metrics_analysis = {}
+    feature_importances_analysis = {}
+    feature_importances_stds = {}
+
+    metric_concat_dict = {}
+    for iter_index, metrics_score in enumerate(
+            iter_results_lists["metrics_scores"]):
+        for metric_name, dataframe in metrics_score.items():
+            if metric_name not in metric_concat_dict:
+                metric_concat_dict[metric_name] = dataframe
+            else:
+                metric_concat_dict[metric_name] = pd.concat(
+                    [metric_concat_dict[metric_name], dataframe])
+
+    for metric_name, dataframe in metric_concat_dict.items():
+        metrics_analysis[metric_name] = {}
+        metrics_analysis[metric_name][
+            "mean"] = dataframe.groupby(dataframe.index).mean()
+        metrics_analysis[metric_name][
+            "std"] = dataframe.groupby(dataframe.index).std(ddof=0)
+
+    durations_df_concat = pd.DataFrame(dtype=float)
+    for iter_index, durations_df in enumerate(iter_results_lists["durations"]):
+        durations_df_concat = pd.concat((durations_df_concat, durations_df),
+                                        axis=1)
+    durations_df_concat = durations_df_concat.astype(float)
+    grouped_df = durations_df_concat.groupby(durations_df_concat.columns, axis=1)
+    duration_means = grouped_df.mean()
+    duration_stds = grouped_df.std()
+
+    importance_concat_dict = {}
+    for iter_index, view_feature_importances in enumerate(
+            iter_results_lists["feature_importances"]):
+        for view_name, feature_importances in view_feature_importances.items():
+            if view_name not in importance_concat_dict:
+                importance_concat_dict[view_name] = feature_importances
+            else:
+                importance_concat_dict[view_name] = pd.concat(
+                    [importance_concat_dict[view_name], feature_importances])
+
+    for view_name, dataframe in importance_concat_dict.items():
+        feature_importances_analysis[view_name] = dataframe.groupby(
+            dataframe.index).mean()
+
+        feature_importances_stds[view_name] = dataframe.groupby(
+            dataframe.index).std(ddof=0)
+
+    added_example_errors = {}
+    for example_errors in iter_results_lists["example_errors"]:
+        for classifier_name, errors in example_errors.items():
+            if classifier_name not in added_example_errors:
+                added_example_errors[classifier_name] = errors
+            else:
+                added_example_errors[classifier_name] += errors
+    error_analysis = added_example_errors
+    return metrics_analysis, error_analysis, feature_importances_analysis, \
+           feature_importances_stds, iter_results_lists["labels"], \
+           duration_means, duration_stds
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/feature_importances.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/feature_importances.py
new file mode 100644
index 0000000000000000000000000000000000000000..a86ccf4609360a9a16a35c96255ca538183707d9
--- /dev/null
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/feature_importances.py
@@ -0,0 +1,80 @@
+import os
+import plotly
+import pandas as pd
+import numpy as np
+
+from ..monoview.monoview_utils import MonoviewResult
+
+
+def get_feature_importances(result, feature_names=None):
+    r"""Extracts the feature importance from the monoview results and stores
+    them in a dictionnary :
+    feature_importance[view_name] is a pandas.DataFrame of size n_feature*n_clf
+    containing a score of importance for each feature.
+
+    Parameters
+    ----------
+    result : list of results
+
+    Returns
+    -------
+    feature_importances : dict of pd.DataFrame
+        The dictionary containing all the feature importance for each view as
+        pandas DataFrames
+    """
+    feature_importances = {}
+    for classifier_result in result:
+        if isinstance(classifier_result, MonoviewResult):
+            if classifier_result.view_name not in feature_importances:
+                feature_importances[classifier_result.view_name] = pd.DataFrame(
+                    index=feature_names)
+            if hasattr(classifier_result.clf, 'feature_importances_'):
+                feature_importances[classifier_result.view_name][
+                    classifier_result.classifier_name] = classifier_result.clf.feature_importances_
+            else:
+                feature_importances[classifier_result.view_name][
+                    classifier_result.classifier_name] = np.zeros(
+                    classifier_result.n_features)
+    return feature_importances
+
+def publish_feature_importances(feature_importances, directory, database_name,
+                                feature_stds=None):
+    for view_name, feature_importance in feature_importances.items():
+        if not os.path.exists(os.path.join(directory, "feature_importances")):
+            os.mkdir(os.path.join(directory, "feature_importances"))
+        file_name = os.path.join(directory, "feature_importances",
+                                 database_name + "-" + view_name
+                                 + "-feature_importances")
+        if feature_stds is not None:
+            feature_std = feature_stds[view_name]
+            feature_std.to_csv(file_name + "_dataframe_stds.csv")
+        else:
+            feature_std = pd.DataFrame(data=np.zeros(feature_importance.shape),
+                                       index=feature_importance.index,
+                                       columns=feature_importance.columns)
+        feature_importance.to_csv(file_name + "_dataframe.csv")
+        hover_text = [["-Feature :" + str(feature_name) +
+                       "<br>-Classifier : " + classifier_name +
+                       "<br>-Importance : " + str(
+            feature_importance.loc[feature_name][classifier_name]) +
+                       "<br>-STD : " + str(
+            feature_std.loc[feature_name][classifier_name])
+                       for classifier_name in list(feature_importance.columns)]
+                      for feature_name in list(feature_importance.index)]
+        fig = plotly.graph_objs.Figure(data=plotly.graph_objs.Heatmap(
+            x=list(feature_importance.columns),
+            y=list(feature_importance.index),
+            z=feature_importance.values,
+            text=hover_text,
+            hoverinfo=["text"],
+            colorscale="Greys",
+            reversescale=False))
+        fig.update_layout(
+            xaxis={"showgrid": False, "showticklabels": False, "ticks": ''},
+            yaxis={"showgrid": False, "showticklabels": False, "ticks": ''})
+        fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
+                          plot_bgcolor='rgba(0,0,0,0)')
+        plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
+
+        del fig
+
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/metric_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/metric_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3eb5cde7cd8da8c9bccc0efb6cadf4ba8f4e055
--- /dev/null
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/metric_analysis.py
@@ -0,0 +1,340 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+import plotly
+import logging
+
+from ..utils.organization import secure_file_path
+
+def get_metrics_scores(metrics, results):
+    r"""Used to extract metrics scores in case of classification
+
+    Parameters
+    ----------
+    metrics : list of lists
+        The metrics names with configuration metrics[i][0] = name of metric i
+    results : list of MonoviewResult and MultiviewResults objects
+        A list containing all the results for all the monoview experimentations.
+
+    Returns
+    -------
+    metricsScores : dict of dict of list
+        Regroups all the scores for each metrics for each classifier and for
+        the train and test sets.
+        organized as :
+        -`metricScores[metric_name]["classifiers_names"]` is a list of all the
+        classifiers available for this metric,
+        -`metricScores[metric_name]["train_scores"]` is a list of all the
+        available classifiers scores on the train set,
+        -`metricScores[metric_name]["test_scores"]` is a list of all the
+        available classifiers scores on the test set.
+    """
+    classifier_names = []
+    classifier_names = [classifierResult.get_classifier_name()
+                        for classifierResult in results
+                        if classifierResult.get_classifier_name()
+                        not in classifier_names]
+    metrics_scores = dict((metric[0], pd.DataFrame(data=np.zeros((2,
+                                                                  len(
+                                                                      classifier_names))),
+                                                   index=["train", "test"],
+                                                   columns=classifier_names))
+                          for metric in metrics)
+
+    for metric in metrics:
+        for classifierResult in results:
+            metrics_scores[metric[0]].loc[
+                "train", classifierResult.get_classifier_name()] = \
+            classifierResult.metrics_scores[metric[0]][0]
+            metrics_scores[metric[0]].loc[
+                "test", classifierResult.get_classifier_name()] = \
+                classifierResult.metrics_scores[metric[0]][1]
+
+    return metrics_scores
+
+
+def publish_metrics_graphs(metrics_scores, directory, database_name,
+                           labels_names):
+    r"""Used to sort the results (names and both scores) in descending test
+    score order.
+
+    Parameters
+    ----------
+    metrics_scores : dict of dicts of lists or np.arrays
+        Keys : The names of the metrics.
+        Values : The scores and names of each classifier .
+    directory : str
+        The path to the directory where the figures will be saved.
+    database_name : str
+        The name of the database on which the experiments where conducted.
+    labels_names : list of strs
+        The name corresponding to each numerical label.
+
+    Returns
+    -------
+    results
+    """
+    results = []
+    for metric_name, metric_dataframe in metrics_scores.items():
+        logging.debug(
+            "Start:\t Biclass score graph generation for " + metric_name)
+        train_scores, test_scores, classifier_names, \
+        file_name, nb_results, results = init_plot(results, metric_name,
+                                                   metric_dataframe, directory,
+                                                   database_name, labels_names)
+
+        plot_metric_scores(train_scores, test_scores, classifier_names,
+                           nb_results, metric_name, file_name,
+                           tag=" " + " vs ".join(labels_names))
+        logging.debug(
+            "Done:\t Biclass score graph generation for " + metric_name)
+    return results
+
+
+def publish_all_metrics_scores(iter_results, directory,
+                               data_base_name, stats_iter,
+                               min_size=10):
+    results = []
+    secure_file_path(os.path.join(directory, "a"))
+
+    for metric_name, scores in iter_results.items():
+        train = np.array(scores["mean"].loc["train"])
+        test = np.array(scores["mean"].loc["test"])
+        names = np.array(scores["mean"].columns)
+        train_std = np.array(scores["std"].loc["train"])
+        test_std = np.array(scores["std"].loc["test"])
+
+        file_name = os.path.join(directory, data_base_name + "-Mean_on_" + str(
+            stats_iter) + "_iter-" + metric_name)
+        nbResults = names.shape[0]
+
+        plot_metric_scores(train, test, names, nbResults,
+                           metric_name, file_name, tag=" averaged",
+                           train_STDs=train_std, test_STDs=test_std)
+        results += [[classifier_name, metric_name, test_mean, test_std]
+                    for classifier_name, test_mean, test_std
+                    in zip(names, test, test_std)]
+    return results
+
+
+def init_plot(results, metric_name, metric_dataframe,
+              directory, database_name, labels_names):
+    train = np.array(metric_dataframe.loc["train"])
+    test = np.array(metric_dataframe.loc["test"])
+    classifier_names = np.array(metric_dataframe.columns)
+
+    nb_results = metric_dataframe.shape[1]
+
+    file_name = os.path.join(directory, database_name + "-" + "_vs_".join(
+        labels_names) + "-" + metric_name)
+
+    results += [[classifiers_name, metric_name, testMean, testSTD]
+                for classifiers_name, testMean, testSTD in
+                zip(classifier_names, test, np.zeros(len(test)))]
+    return train, test, classifier_names, file_name, nb_results, results
+
+
+def plot_metric_scores(train_scores, test_scores, names, nb_results,
+                       metric_name,
+                       file_name,
+                       tag="", train_STDs=None, test_STDs=None,
+                       use_plotly=True):
+    r"""Used to plot and save the score barplot for a specific metric.
+
+    Parameters
+    ----------
+    train_scores : list or np.array of floats
+        The scores of each classifier on the training set.
+    test_scores : list or np.array of floats
+        The scores of each classifier on the testing set.
+    names : list or np.array of strs
+        The names of all the classifiers.
+    nb_results: int
+        The number of classifiers to plot.
+    metric_name : str
+        The plotted metric's name
+    file_name : str
+        The name of the file where the figure will be saved.
+    tag : str
+        Some text to personalize the title, must start with a whitespace.
+    train_STDs : np.array of floats or None
+        The array containing the standard deviations for the averaged scores on the training set.
+    test_STDs : np.array of floats or None
+        The array containing the standard deviations for the averaged scores on the testing set.
+
+    Returns
+    -------
+    """
+
+    figKW, barWidth = get_fig_size(nb_results)
+
+    names, train_scores, test_scores, train_STDs, test_STDs = sort_by_test_score(
+        train_scores, test_scores, names,
+        train_STDs, test_STDs)
+
+    f, ax = plt.subplots(nrows=1, ncols=1, **figKW)
+    ax.set_title(metric_name + "\n" + tag + " scores for each classifier")
+
+    rects = ax.bar(range(nb_results), test_scores, barWidth, color="0.1",
+                   yerr=test_STDs)
+    rect2 = ax.bar(np.arange(nb_results) + barWidth, train_scores, barWidth,
+                   color="0.8", yerr=train_STDs)
+    autolabel(rects, ax, set=1, std=test_STDs)
+    autolabel(rect2, ax, set=2, std=train_STDs)
+    ax.legend((rects[0], rect2[0]), ('Test', 'Train'))
+    ax.set_ylim(-0.1, 1.1)
+    ax.set_xticks(np.arange(nb_results) + barWidth / 2)
+    ax.set_xticklabels(names, rotation="vertical")
+
+    try:
+        plt.tight_layout()
+    except:
+        pass
+    f.savefig(file_name + '.png', transparent=True)
+    plt.close()
+    import pandas as pd
+    if train_STDs is None:
+        dataframe = pd.DataFrame(np.transpose(np.concatenate((
+            train_scores.reshape((train_scores.shape[0], 1)),
+            test_scores.reshape((train_scores.shape[0], 1))), axis=1)),
+            columns=names, index=["Train", "Test"])
+    else:
+        dataframe = pd.DataFrame(np.transpose(np.concatenate((
+            train_scores.reshape((train_scores.shape[0], 1)),
+            train_STDs.reshape((train_scores.shape[0], 1)),
+            test_scores.reshape((train_scores.shape[0], 1)),
+            test_STDs.reshape((train_scores.shape[0], 1))), axis=1)),
+            columns=names, index=["Train", "Train STD", "Test", "Test STD"])
+    dataframe.to_csv(file_name + ".csv")
+    if use_plotly:
+        fig = plotly.graph_objs.Figure()
+        fig.add_trace(plotly.graph_objs.Bar(
+            name='Train',
+            x=names, y=train_scores,
+            error_y=dict(type='data', array=train_STDs),
+            marker_color="lightgrey",
+        ))
+        fig.add_trace(plotly.graph_objs.Bar(
+            name='Test',
+            x=names, y=test_scores,
+            error_y=dict(type='data', array=test_STDs),
+            marker_color="black",
+        ))
+
+        fig.update_layout(
+            title=metric_name + "<br>" + tag + " scores for each classifier")
+        fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
+                          plot_bgcolor='rgba(0,0,0,0)')
+        plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
+        del fig
+
+
+def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35):
+    r"""Used to get the image size to save the figure and the bar width, depending on the number of scores to plot.
+
+    Parameters
+    ----------
+    nb_results : int
+        The number of couple of bar to plot.
+    min_size : int
+        The minimum size of the image, if there are few classifiers to plot.
+    multiplier : float
+        The ratio between the image size and the number of classifiers.
+    bar_width : float
+        The width of the bars in the figure. Mainly here to centralize bar_width.
+
+    Returns
+    -------
+    fig_kwargs : dict of arguments
+        The argument restraining the size of the figure, usable directly in the `subplots` function of
+        `matplotlib.pyplot`.
+    bar_width : float
+        The width of the bars in the figure. Mainly here to centralize bar_width.
+    """
+    size = nb_results * multiplier
+    if size < min_size:
+        size = min_size
+    fig_kwargs = {"figsize": (size, size / 3)}
+    return fig_kwargs, bar_width
+
+
+def autolabel(rects, ax, set=1, std=None):
+    r"""Used to print the score below the bars.
+
+    Parameters
+    ----------
+    rects : pyplot bar object
+        THe bars.
+    ax : pyplot ax object
+        The ax.
+    set : integer
+        1 means the test scores, anything else means the train score
+    std: None or array
+        The standard deviations in the case of statsIter results.
+
+    Returns
+    -------
+    """
+    if set == 1:
+        text_height = -0.05
+        weight = "bold"
+    else:
+        text_height = -0.07
+        weight = "normal"
+    for rectIndex, rect in enumerate(rects):
+        height = rect.get_height()
+        if std is not None:
+            ax.text(rect.get_x() + rect.get_width() / 2., text_height,
+                    "%.2f" % height + u'\u00B1' + "%.2f" % std[rectIndex],
+                    weight=weight,
+                    ha='center', va='bottom', size="x-small")
+        else:
+            ax.text(rect.get_x() + rect.get_width() / 2., text_height,
+                    "%.2f" % height, weight=weight,
+                    ha='center', va='bottom', size="small")
+
+
+def sort_by_test_score(train_scores, test_scores, names, train_STDs=None,
+                       test_STDs=None):
+    r"""Used to sort the results (names and both scores) in descending test score order.
+
+    Parameters
+    ----------
+    train_scores : np.array of floats
+        The scores of each classifier on the training set.
+    test_scores : np.array of floats
+        The scores of each classifier on the testing set.
+    names : np.array of strs
+        The names of all the classifiers.
+    train_STDs : np.array of floats or None
+        The array containing the standard deviations for the averaged scores on the training set.
+    test_STDs : np.array of floats or None
+        The array containing the standard deviations for the averaged scores on the testing set.
+
+    Returns
+    -------
+    sorted_names : np.array of strs
+        The names of all the classifiers, sorted in descending test score order.
+    sorted_train_scores : np.array of floats
+        The scores of each classifier on the training set, sorted in descending test score order.
+    sorted_test_scores : np.array of floats
+        The scores of each classifier on the testing set, sorted in descending test score order.
+    sorted_train_STDs : np.array of floats or None
+        The array containing the standard deviations for the averaged scores on the training set,
+        sorted in descending test score order.
+    sorted_test_STDs : np.array of floats or None
+        The array containing the standard deviations for the averaged scores on the testing set,
+        sorted in descending test score order.
+    """
+    sorted_indices = np.argsort(test_scores)
+    sorted_test_scores = test_scores[sorted_indices]
+    sorted_train_scores = train_scores[sorted_indices]
+    sorted_names = names[sorted_indices]
+    if train_STDs is not None and test_STDs is not None:
+        sorted_train_STDs = train_STDs[sorted_indices]
+        sorted_test_STDs = test_STDs[sorted_indices]
+    else:
+        sorted_train_STDs = None
+        sorted_test_STDs = None
+    return sorted_names, sorted_train_scores, sorted_test_scores, sorted_train_STDs, sorted_test_STDs
\ No newline at end of file
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..96973ba36f3858fc16eaa54179f7b6effcb90db2
--- /dev/null
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/noise_analysis.py
@@ -0,0 +1,56 @@
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import os
+from matplotlib.patches import Patch
+
+
+def plot_results_noise(directory, noise_results, metric_to_plot, name,
+                       width=0.1):
+    avail_colors = ["tab:blue", "tab:orange", "tab:brown", "tab:gray",
+                    "tab:olive", "tab:red", ]
+    colors = {}
+    lengend_patches = []
+    noise_levels = np.array([noise_level for noise_level, _ in noise_results])
+    df = pd.DataFrame(
+        columns=['noise_level', 'classifier_name', 'mean_score', 'score_std'], )
+    if len(noise_results) > 1:
+        width = np.min(np.diff(noise_levels))
+    for noise_level, noise_result in noise_results:
+        classifiers_names, meaned_metrics, metric_stds = [], [], []
+        for noise_result in noise_result:
+            classifier_name = noise_result[0].split("-")[0]
+            if noise_result[1] is metric_to_plot:
+                classifiers_names.append(classifier_name)
+                meaned_metrics.append(noise_result[2])
+                metric_stds.append(noise_result[3])
+                if classifier_name not in colors:
+                    try:
+                        colors[classifier_name] = avail_colors.pop(0)
+                    except IndexError:
+                        colors[classifier_name] = "k"
+        classifiers_names, meaned_metrics, metric_stds = np.array(
+            classifiers_names), np.array(meaned_metrics), np.array(metric_stds)
+        sorted_indices = np.argsort(-meaned_metrics)
+        for index in sorted_indices:
+            row = pd.DataFrame(
+                {'noise_level': noise_level,
+                 'classifier_name': classifiers_names[index],
+                 'mean_score': meaned_metrics[index],
+                 'score_std': metric_stds[index]}, index=[0])
+            df = pd.concat([df, row])
+            plt.bar(noise_level, meaned_metrics[index], yerr=metric_stds[index],
+                    width=0.5 * width, label=classifiers_names[index],
+                    color=colors[classifiers_names[index]])
+    for classifier_name, color in colors.items():
+        lengend_patches.append(Patch(facecolor=color, label=classifier_name))
+    plt.legend(handles=lengend_patches, loc='lower center',
+               bbox_to_anchor=(0.5, 1.05), ncol=2)
+    plt.ylabel(metric_to_plot)
+    plt.title(name)
+    plt.xticks(noise_levels)
+    plt.xlabel("Noise level")
+    plt.savefig(os.path.join(directory, name + "_noise_analysis.png"))
+    plt.close()
+    df.to_csv(os.path.join(directory, name + "_noise_analysis.csv"))
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis/tracebacks_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis/tracebacks_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..329a27f6fe98c23b94b1053847c7482165d970d4
--- /dev/null
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis/tracebacks_analysis.py
@@ -0,0 +1,36 @@
+import os
+
+
+def publish_tracebacks(directory, database_name, labels_names, tracebacks,
+                       iter_index):
+    if tracebacks:
+        with open(os.path.join(directory, database_name +
+                                          "-iter" + str(iter_index) +
+                                          "-tacebacks.txt"),
+                  "w") as traceback_file:
+            failed_list = save_dict_to_text(tracebacks, traceback_file)
+        flagged_list = [_ + "-iter" + str(iter_index) for _ in failed_list]
+    else:
+        flagged_list = {}
+    return flagged_list
+
+
+def save_dict_to_text(dictionnary, output_file):
+    # TODO : smarter way must exist
+    output_file.write("Failed algorithms : \n\t" + ",\n\t".join(
+        dictionnary.keys()) + ".\n\n\n")
+    for key, value in dictionnary.items():
+        output_file.write(key)
+        output_file.write("\n\n")
+        output_file.write(value)
+        output_file.write("\n\n\n")
+    return dictionnary.keys()
+
+
+def save_failed(failed_list, directory):
+    with open(os.path.join(directory, "failed_algorithms.txt"),
+              "w") as failed_file:
+        failed_file.write(
+            "The following algorithms sent an error, the tracebacks are stored "
+            "in the coressponding directory :\n")
+        failed_file.write(", \n".join(failed_list) + ".")
diff --git a/multiview_platform/tests/__init__.py b/multiview_platform/tests/__init__.py
index b7887f5996bd1484d567e919608c364fe9a64c63..194018ae5ef03ba4d863b4e1497acae3b317589a 100644
--- a/multiview_platform/tests/__init__.py
+++ b/multiview_platform/tests/__init__.py
@@ -1,2 +1,2 @@
-from . import test_ExecClassif
+from . import test_exec_classif
 from .utils import rm_tmp, gen_test_dataset, tmp_path
\ No newline at end of file
diff --git a/multiview_platform/tests/test_ResultAnalysis.py b/multiview_platform/tests/test_ResultAnalysis.py
deleted file mode 100644
index 98e4cabf602f66505fc784e0bd66fd464f7656de..0000000000000000000000000000000000000000
--- a/multiview_platform/tests/test_ResultAnalysis.py
+++ /dev/null
@@ -1,300 +0,0 @@
-import unittest
-import numpy as np
-import pandas as pd
-import os
-
-from multiview_platform.mono_multi_view_classifiers import result_analysis
-from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import MultiviewResult
-from multiview_platform.mono_multi_view_classifiers.monoview.monoview_utils import MonoviewResult
-
-
-class Test_get_arguments(unittest.TestCase):
-
-    def setUp(self):
-        self.benchamrk_argument_dictionaries = [{"flag":"good_flag", "valid":True},
-                                                {"flag":"bad_flag", "valid":False}]
-
-    def test_benchmark_wanted(self):
-        argument_dict = result_analysis.get_arguments(self.benchamrk_argument_dictionaries, "good_flag")
-        self.assertTrue(argument_dict["valid"])
-
-
-class Test_get_metrics_scores_biclass(unittest.TestCase):
-
-
-    def test_simple(self):
-        metrics = [["accuracy_score"], ["f1_score"]]
-        results = [MonoviewResult(0,
-                                  "ada",
-                                  "0",
-                                  {"accuracy_score":[0.9, 0.95],
-                                   "f1_score":[0.91, 0.96]}
-                                  , "", "", "", "", "",0,0)]
-        metrics_scores = result_analysis.get_metrics_scores(metrics,
-                                                            results)
-        self.assertIsInstance(metrics_scores, dict)
-        self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
-        np.testing.assert_array_equal(np.array(metrics_scores["accuracy_score"].loc["train"]), np.array([0.9]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["accuracy_score"].loc["test"]),
-            np.array([0.95]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].loc["train"]),
-            np.array([0.91]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].loc["test"]),
-            np.array([0.96]))
-        np.testing.assert_array_equal(np.array(metrics_scores["f1_score"].columns),
-                                      np.array(["ada-0"]))
-
-    def test_multiple_monoview_classifiers(self):
-        metrics = [["accuracy_score"], ["f1_score"]]
-        results = [MonoviewResult(view_index=0,
-                                  classifier_name="ada",
-                                  view_name="0",
-                                  metrics_scores={"accuracy_score": [0.9, 0.95],
-                                   "f1_score": [0.91, 0.96]},
-                                  full_labels_pred="",
-                                  classifier_config="",
-                                  classifier="",
-                                  n_features="",
-                                  hps_duration=0,
-                                  fit_duration=0,
-                                  pred_duration=0),
-                   MonoviewResult(view_index=0,
-                                  classifier_name="dt",
-                                  view_name="1",
-                                  metrics_scores={"accuracy_score": [0.8, 0.85],
-                                   "f1_score": [0.81, 0.86]},
-                                  full_labels_pred="",
-                                  classifier_config="",
-                                  classifier="",
-                                  n_features="",
-                                  hps_duration=0,
-                                  fit_duration=0,
-                                  pred_duration=0)
-                   ]
-        metrics_scores = result_analysis.get_metrics_scores(metrics,
-                                                            results)
-        self.assertIsInstance(metrics_scores, dict)
-        self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["accuracy_score"].loc["train"]),
-            np.array([0.9, 0.8]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["accuracy_score"].loc["test"]),
-            np.array([0.95, 0.85]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].loc["train"]),
-            np.array([0.91, 0.81]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].loc["test"]),
-            np.array([0.96, 0.86]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].columns),
-            np.array(["ada-0", "dt-1"]))
-
-    def test_mutiview_result(self):
-        metrics = [["accuracy_score"], ["f1_score"]]
-        results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75],
-                                   "f1_score": [0.71, 0.76]}, "",0,0,0 ),
-                   MonoviewResult(view_index=0,
-                                  classifier_name="dt",
-                                  view_name="1",
-                                  metrics_scores={"accuracy_score": [0.8, 0.85],
-                                                  "f1_score": [0.81, 0.86]},
-                                  full_labels_pred="",
-                                  classifier_config="",
-                                  classifier="",
-                                  n_features="",
-                                  hps_duration=0,
-                                  fit_duration=0,
-                                  pred_duration=0)
-                   ]
-        metrics_scores = result_analysis.get_metrics_scores(metrics,
-                                                            results)
-        self.assertIsInstance(metrics_scores, dict)
-        self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["accuracy_score"].loc["train"]),
-            np.array([0.7, 0.8]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["accuracy_score"].loc["test"]),
-            np.array([0.75, 0.85]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].loc["train"]),
-            np.array([0.71, 0.81]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].loc["test"]),
-            np.array([0.76, 0.86]))
-        np.testing.assert_array_equal(
-            np.array(metrics_scores["f1_score"].columns),
-            np.array(["mv", "dt-1"]))
-
-class Test_get_example_errors_biclass(unittest.TestCase):
-
-    def test_simple(self):
-        ground_truth = np.array([0,1,0,1,0,1,0,1, -100])
-        results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75],
-                                              "f1_score": [0.71, 0.76]},
-                                   np.array([0,0,0,0,1,1,1,1,1]),
-                                   0,0,0),
-                   MonoviewResult(0,
-                                  "dt",
-                                  "1",
-                                  {"accuracy_score": [0.8, 0.85],
-                                   "f1_score": [0.81, 0.86]}
-                                  , np.array([0,0,1,1,0,0,1,1,0]), "", "",
-                                  "", "",0,0)
-                   ]
-        example_errors = result_analysis.get_example_errors(ground_truth,
-                                                            results)
-        self.assertIsInstance(example_errors, dict)
-        np.testing.assert_array_equal(example_errors["mv"],
-                                      np.array([1,0,1,0,0,1,0,1,-100]))
-        np.testing.assert_array_equal(example_errors["dt-1"],
-                                      np.array([1, 0, 0, 1, 1, 0, 0, 1,-100]))
-
-
-class Test_init_plot(unittest.TestCase):
-
-    def test_simple(self):
-        results = []
-        metric_name = "acc"
-        data = np.random.RandomState(42).uniform(0,1,(2,2))
-        metric_dataframe = pd.DataFrame(index=["train", "test"],
-                                        columns=["dt-1", "mv"], data=data)
-        directory = "dir"
-        database_name = 'db'
-        labels_names = ['lb1', "lb2"]
-        train, test, classifier_names, \
-        file_name, nb_results, results = result_analysis.init_plot(results,
-                                                                   metric_name,
-                                                                   metric_dataframe,
-                                                                   directory,
-                                                                   database_name,
-                                                                   labels_names)
-        self.assertEqual(file_name, os.path.join("dir", "db-lb1_vs_lb2-acc"))
-        np.testing.assert_array_equal(train, data[0,:])
-        np.testing.assert_array_equal(test, data[1, :])
-        np.testing.assert_array_equal(classifier_names, np.array(["dt-1", "mv"]))
-        self.assertEqual(nb_results, 2)
-        self.assertEqual(results, [["dt-1", "acc", data[1,0], 0],
-                                   ["mv", "acc", data[1,1], 0]])
-
-class Test_gen_error_data(unittest.TestCase):
-
-    def test_simple(self):
-        random_state = np.random.RandomState(42)
-        ada_data = random_state.randint(0,2,size=7)
-        mv_data = random_state.randint(0, 2, size=7)
-        example_errors = {"ada-1": ada_data,
-                          "mv": mv_data}
-        nb_classifiers, nb_examples, classifiers_names, \
-        data_2d, error_on_examples = result_analysis.gen_error_data(example_errors)
-        self.assertEqual(nb_classifiers, 2)
-        self.assertEqual(nb_examples, 7)
-        self.assertEqual(classifiers_names, ["ada-1", "mv"])
-        np.testing.assert_array_equal(data_2d, np.array([ada_data, mv_data]).transpose())
-        np.testing.assert_array_equal(error_on_examples, -1*(ada_data+mv_data)/nb_classifiers)
-
-
-class Test_format_previous_results(unittest.TestCase):
-
-    def test_simple(self):
-        biclass_results = {"metrics_scores":[], "example_errors":[], "feature_importances":[], "labels":[], "durations":[]}
-        random_state = np.random.RandomState(42)
-
-        # Gen metrics data
-        metrics_1_data = random_state.uniform(size=(2,2))
-        metrics_2_data = random_state.uniform(size=(2,2))
-        metric_1_df = pd.DataFrame(data=metrics_1_data, index=["train", "test"],
-                                   columns=["ada-1", "mv"])
-        metric_2_df = pd.DataFrame(data=metrics_2_data, index=["train", "test"],
-                                   columns=["ada-1", "mv"])
-        biclass_results["metrics_scores"].append({"acc": metric_1_df})
-        biclass_results["metrics_scores"].append({"acc": metric_2_df})
-
-        # Gen error data
-        ada_error_data_1 = random_state.randint(0,2,7)
-        ada_error_data_2 = random_state.randint(0, 2, 7)
-        ada_sum = ada_error_data_1+ada_error_data_2
-        mv_error_data_1 = random_state.randint(0, 2, 7)
-        mv_error_data_2 = random_state.randint(0, 2, 7)
-        mv_sum = mv_error_data_1+mv_error_data_2
-        biclass_results["example_errors"].append({})
-        biclass_results["example_errors"].append({})
-        biclass_results["example_errors"][0]["ada-1"] = ada_error_data_1
-        biclass_results["example_errors"][0]["mv"] = mv_error_data_1
-        biclass_results["example_errors"][1]["ada-1"] = ada_error_data_2
-        biclass_results["example_errors"][1]["mv"] = mv_error_data_2
-
-        biclass_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"],
-                                                         columns=["plif", "plaf"],
-                                                         data=np.zeros((2,2))))
-        biclass_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"],
-                                                         columns=["plif",
-                                                                  "plaf"],
-                                                         data=np.ones((2, 2))))
-
-        # Running the function
-        metric_analysis, error_analysis, \
-        feature_importances, feature_stds, \
-        labels, durations_mean, duration_std = result_analysis.format_previous_results(biclass_results)
-        mean_df = pd.DataFrame(data=np.mean(np.array([metrics_1_data,
-                                                      metrics_2_data]),
-                                            axis=0),
-                               index=["train", "test"],
-                               columns=["ada-1", "mvm"])
-        std_df =  pd.DataFrame(data=np.std(np.array([metrics_1_data,
-                                                      metrics_2_data]),
-                                            axis=0),
-                               index=["train", "test"],
-                               columns=["ada-1", "mvm"])
-
-        # Testing
-        np.testing.assert_array_equal(metric_analysis["acc"]["mean"].loc["train"],
-                                      mean_df.loc["train"])
-        np.testing.assert_array_equal(metric_analysis["acc"]["mean"].loc["test"],
-            mean_df.loc["test"])
-        np.testing.assert_array_equal(metric_analysis["acc"]["std"].loc["train"],
-            std_df.loc["train"])
-        np.testing.assert_array_equal(metric_analysis["acc"]["std"].loc["test"],
-            std_df.loc["test"])
-        np.testing.assert_array_equal(ada_sum, error_analysis["ada-1"])
-        np.testing.assert_array_equal(mv_sum, error_analysis["mv"])
-        self.assertEqual(durations_mean.at["ada-1", 'plif'], 0.5)
-
-
-class Test_gen_error_data_glob(unittest.TestCase):
-
-    def test_simple(self):
-        random_state = np.random.RandomState(42)
-
-        ada_error_data_1 = random_state.randint(0,2,7)
-        ada_error_data_2 = random_state.randint(0, 2, 7)
-        ada_sum = ada_error_data_1+ada_error_data_2
-        mv_error_data_1 = random_state.randint(0, 2, 7)
-        mv_error_data_2 = random_state.randint(0, 2, 7)
-        mv_sum = mv_error_data_1+mv_error_data_2
-
-        combi_results = {"ada-1":ada_sum, "mv": mv_sum}
-
-        stats_iter = 2
-
-        nb_examples, nb_classifiers, \
-        data, error_on_examples, \
-        classifier_names = result_analysis.gen_error_data_glob(combi_results,
-                                                              stats_iter)
-        self.assertEqual(nb_examples, 7)
-        self.assertEqual(nb_classifiers, 2)
-        np.testing.assert_array_equal(data, np.array([ada_sum, mv_sum]).transpose())
-        np.testing.assert_array_equal(error_on_examples, -1*np.sum(np.array([ada_sum, mv_sum]), axis=0)+(nb_classifiers*stats_iter))
-        self.assertEqual(classifier_names, ["ada-1", "mv"])
-
-
-
-
-
-
-
diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_exec_classif.py
similarity index 94%
rename from multiview_platform/tests/test_ExecClassif.py
rename to multiview_platform/tests/test_exec_classif.py
index 3179d209d5a14c570cc0c4bf3da9dff32103d5b6..5187ad89a1e4c687eaea5550060d9ad54d588a47 100644
--- a/multiview_platform/tests/test_ExecClassif.py
+++ b/multiview_platform/tests/test_exec_classif.py
@@ -265,7 +265,7 @@ class Test_execBenchmark(unittest.TestCase):
                                           # exec_one_benchmark=fakeBenchmarkExec,
                                           # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore,
                                           exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore,
-                                          get_results=fakegetResults,
+                                          analyze=fakegetResults,
                                           delete=fakeDelete,
                                           analyze_iterations=fake_analyze)
         cls.assertEqual(res, 3)
@@ -280,11 +280,11 @@ class Test_execBenchmark(unittest.TestCase):
                                           metrics=[[[1, 2], [3, 4, 5]]],
                                           dataset_var=cls.Dataset,
                                           track_tracebacks=6,
-                                         # exec_one_benchmark=fakeBenchmarkExec,
-                                         # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore,
-                                         exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore,
-                                         get_results=fakegetResults,
-                                         delete=fakeDelete,
+                                          # exec_one_benchmark=fakeBenchmarkExec,
+                                          # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore,
+                                          exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore,
+                                          analyze=fakegetResults,
+                                          delete=fakeDelete,
                                           analyze_iterations=fake_analyze)
         cls.assertEqual(res, 3)
 
@@ -300,11 +300,11 @@ class Test_execBenchmark(unittest.TestCase):
                                           metrics=[[[1, 2], [3, 4, 5]]],
                                           dataset_var=cls.Dataset,
                                           track_tracebacks=6,
-                                         # exec_one_benchmark=fakeBenchmarkExec,
-                                         # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore,
-                                         exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore,
-                                         get_results=fakegetResults,
-                                         delete=fakeDelete,
+                                          # exec_one_benchmark=fakeBenchmarkExec,
+                                          # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore,
+                                          exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore,
+                                          analyze=fakegetResults,
+                                          delete=fakeDelete,
                                           analyze_iterations=fake_analyze)
         cls.assertEqual(res, 3)
 
@@ -316,11 +316,11 @@ class Test_execBenchmark(unittest.TestCase):
                                           metrics=[[[1, 2], [3, 4, 5]]],
                                           dataset_var=cls.Dataset,
                                           track_tracebacks=6,
-                                         # exec_one_benchmark=fakeBenchmarkExec,
-                                         # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore,
-                                         exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore,
-                                         get_results=fakegetResults,
-                                         delete=fakeDelete,
+                                          # exec_one_benchmark=fakeBenchmarkExec,
+                                          # exec_one_benchmark_multicore=fakeBenchmarkExec_mutlicore,
+                                          exec_one_benchmark_mono_core=fakeBenchmarkExec_monocore,
+                                          analyze=fakegetResults,
+                                          delete=fakeDelete,
                                           analyze_iterations=fake_analyze)
         cls.assertEqual(res, 3)
 
diff --git a/multiview_platform/tests/test_result_analysis/__init__.py b/multiview_platform/tests/test_result_analysis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/multiview_platform/tests/test_result_analysis/test_duration_analysis.py b/multiview_platform/tests/test_result_analysis/test_duration_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/multiview_platform/tests/test_result_analysis/test_error_analysis.py b/multiview_platform/tests/test_result_analysis/test_error_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..3168048ae04f61a305f7b79e0b134ef48ae5abfc
--- /dev/null
+++ b/multiview_platform/tests/test_result_analysis/test_error_analysis.py
@@ -0,0 +1,76 @@
+import unittest
+import numpy as np
+
+from multiview_platform.mono_multi_view_classifiers.monoview.monoview_utils import MonoviewResult
+from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import MultiviewResult
+
+from multiview_platform.mono_multi_view_classifiers.result_analysis.error_analysis import get_example_errors, gen_error_data, gen_error_data_glob
+
+
+class Test_get_example_errors(unittest.TestCase):
+
+    def test_simple(self):
+        ground_truth = np.array([0,1,0,1,0,1,0,1, -100])
+        results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75],
+                                              "f1_score": [0.71, 0.76]},
+                                   np.array([0,0,0,0,1,1,1,1,1]),
+                                   0,0,0),
+                   MonoviewResult(0,
+                                  "dt",
+                                  "1",
+                                  {"accuracy_score": [0.8, 0.85],
+                                   "f1_score": [0.81, 0.86]}
+                                  , np.array([0,0,1,1,0,0,1,1,0]), "", "",
+                                  "", "",0,0)
+                   ]
+        example_errors = get_example_errors(ground_truth,
+                                                            results)
+        self.assertIsInstance(example_errors, dict)
+        np.testing.assert_array_equal(example_errors["mv"],
+                                      np.array([1,0,1,0,0,1,0,1,-100]))
+        np.testing.assert_array_equal(example_errors["dt-1"],
+                                      np.array([1, 0, 0, 1, 1, 0, 0, 1,-100]))
+
+class Test_gen_error_data(unittest.TestCase):
+
+    def test_simple(self):
+        random_state = np.random.RandomState(42)
+        ada_data = random_state.randint(0,2,size=7)
+        mv_data = random_state.randint(0, 2, size=7)
+        example_errors = {"ada-1": ada_data,
+                          "mv": mv_data}
+        nb_classifiers, nb_examples, classifiers_names, \
+        data_2d, error_on_examples = gen_error_data(example_errors)
+        self.assertEqual(nb_classifiers, 2)
+        self.assertEqual(nb_examples, 7)
+        self.assertEqual(classifiers_names, ["ada-1", "mv"])
+        np.testing.assert_array_equal(data_2d, np.array([ada_data, mv_data]).transpose())
+        np.testing.assert_array_equal(error_on_examples, -1*(ada_data+mv_data)/nb_classifiers)
+
+
+
+class Test_gen_error_data_glob(unittest.TestCase):
+
+    def test_simple(self):
+        random_state = np.random.RandomState(42)
+
+        ada_error_data_1 = random_state.randint(0,2,7)
+        ada_error_data_2 = random_state.randint(0, 2, 7)
+        ada_sum = ada_error_data_1+ada_error_data_2
+        mv_error_data_1 = random_state.randint(0, 2, 7)
+        mv_error_data_2 = random_state.randint(0, 2, 7)
+        mv_sum = mv_error_data_1+mv_error_data_2
+
+        combi_results = {"ada-1":ada_sum, "mv": mv_sum}
+
+        stats_iter = 2
+
+        nb_examples, nb_classifiers, \
+        data, error_on_examples, \
+        classifier_names = gen_error_data_glob(combi_results,
+                                                              stats_iter)
+        self.assertEqual(nb_examples, 7)
+        self.assertEqual(nb_classifiers, 2)
+        np.testing.assert_array_equal(data, np.array([ada_sum, mv_sum]).transpose())
+        np.testing.assert_array_equal(error_on_examples, -1*np.sum(np.array([ada_sum, mv_sum]), axis=0)+(nb_classifiers*stats_iter))
+        self.assertEqual(classifier_names, ["ada-1", "mv"])
\ No newline at end of file
diff --git a/multiview_platform/tests/test_result_analysis/test_execution.py b/multiview_platform/tests/test_result_analysis/test_execution.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d11cb564d352747589066a8d23a5d0ba51bd00e
--- /dev/null
+++ b/multiview_platform/tests/test_result_analysis/test_execution.py
@@ -0,0 +1,84 @@
+import unittest
+import numpy as np
+import pandas as pd
+
+from multiview_platform.mono_multi_view_classifiers.monoview.monoview_utils import MonoviewResult
+from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import MultiviewResult
+
+from multiview_platform.mono_multi_view_classifiers.result_analysis.execution import format_previous_results, get_arguments
+
+class Test_format_previous_results(unittest.TestCase):
+
+    def test_simple(self):
+        iter_results = {"metrics_scores":[], "example_errors":[], "feature_importances":[], "labels":[], "durations":[]}
+        random_state = np.random.RandomState(42)
+
+        # Gen metrics data
+        metrics_1_data = random_state.uniform(size=(2,2))
+        metrics_2_data = random_state.uniform(size=(2,2))
+        metric_1_df = pd.DataFrame(data=metrics_1_data, index=["train", "test"],
+                                   columns=["ada-1", "mv"])
+        metric_2_df = pd.DataFrame(data=metrics_2_data, index=["train", "test"],
+                                   columns=["ada-1", "mv"])
+        iter_results["metrics_scores"].append({"acc": metric_1_df})
+        iter_results["metrics_scores"].append({"acc": metric_2_df})
+
+        # Gen error data
+        ada_error_data_1 = random_state.randint(0,2,7)
+        ada_error_data_2 = random_state.randint(0, 2, 7)
+        ada_sum = ada_error_data_1+ada_error_data_2
+        mv_error_data_1 = random_state.randint(0, 2, 7)
+        mv_error_data_2 = random_state.randint(0, 2, 7)
+        mv_sum = mv_error_data_1+mv_error_data_2
+        iter_results["example_errors"].append({})
+        iter_results["example_errors"].append({})
+        iter_results["example_errors"][0]["ada-1"] = ada_error_data_1
+        iter_results["example_errors"][0]["mv"] = mv_error_data_1
+        iter_results["example_errors"][1]["ada-1"] = ada_error_data_2
+        iter_results["example_errors"][1]["mv"] = mv_error_data_2
+
+        iter_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"],
+                                                         columns=["plif", "plaf"],
+                                                         data=np.zeros((2,2))))
+        iter_results["durations"].append(pd.DataFrame(index=["ada-1", "mv"],
+                                                         columns=["plif",
+                                                                  "plaf"],
+                                                         data=np.ones((2, 2))))
+
+        # Running the function
+        metric_analysis, error_analysis, \
+        feature_importances, feature_stds, \
+        labels, durations_mean, duration_std = format_previous_results(iter_results)
+        mean_df = pd.DataFrame(data=np.mean(np.array([metrics_1_data,
+                                                      metrics_2_data]),
+                                            axis=0),
+                               index=["train", "test"],
+                               columns=["ada-1", "mvm"])
+        std_df =  pd.DataFrame(data=np.std(np.array([metrics_1_data,
+                                                      metrics_2_data]),
+                                            axis=0),
+                               index=["train", "test"],
+                               columns=["ada-1", "mvm"])
+
+        # Testing
+        np.testing.assert_array_equal(metric_analysis["acc"]["mean"].loc["train"],
+                                      mean_df.loc["train"])
+        np.testing.assert_array_equal(metric_analysis["acc"]["mean"].loc["test"],
+            mean_df.loc["test"])
+        np.testing.assert_array_equal(metric_analysis["acc"]["std"].loc["train"],
+            std_df.loc["train"])
+        np.testing.assert_array_equal(metric_analysis["acc"]["std"].loc["test"],
+            std_df.loc["test"])
+        np.testing.assert_array_equal(ada_sum, error_analysis["ada-1"])
+        np.testing.assert_array_equal(mv_sum, error_analysis["mv"])
+        self.assertEqual(durations_mean.at["ada-1", 'plif'], 0.5)
+
+class Test_get_arguments(unittest.TestCase):
+
+    def setUp(self):
+        self.benchamrk_argument_dictionaries = [{"flag":"good_flag", "valid":True},
+                                                {"flag":"bad_flag", "valid":False}]
+
+    def test_benchmark_wanted(self):
+        argument_dict = get_arguments(self.benchamrk_argument_dictionaries, "good_flag")
+        self.assertTrue(argument_dict["valid"])
diff --git a/multiview_platform/tests/test_result_analysis/test_feature_importances.py b/multiview_platform/tests/test_result_analysis/test_feature_importances.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/multiview_platform/tests/test_result_analysis/test_metric_analysis.py b/multiview_platform/tests/test_result_analysis/test_metric_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..17f8cee856262b19222ab06965090b51b6e453df
--- /dev/null
+++ b/multiview_platform/tests/test_result_analysis/test_metric_analysis.py
@@ -0,0 +1,148 @@
+import unittest
+import numpy as np
+import pandas as pd
+import os
+
+from multiview_platform.mono_multi_view_classifiers.monoview.monoview_utils import MonoviewResult
+from multiview_platform.mono_multi_view_classifiers.multiview.multiview_utils import MultiviewResult
+
+from multiview_platform.mono_multi_view_classifiers.result_analysis.metric_analysis import get_metrics_scores, init_plot
+
+class Test_get_metrics_scores(unittest.TestCase):
+
+
+    def test_simple(self):
+        metrics = [["accuracy_score"], ["f1_score"]]
+        results = [MonoviewResult(0,
+                                  "ada",
+                                  "0",
+                                  {"accuracy_score":[0.9, 0.95],
+                                   "f1_score":[0.91, 0.96]}
+                                  , "", "", "", "", "",0,0)]
+        metrics_scores = get_metrics_scores(metrics,
+                                                            results)
+        self.assertIsInstance(metrics_scores, dict)
+        self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
+        np.testing.assert_array_equal(np.array(metrics_scores["accuracy_score"].loc["train"]), np.array([0.9]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["accuracy_score"].loc["test"]),
+            np.array([0.95]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].loc["train"]),
+            np.array([0.91]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].loc["test"]),
+            np.array([0.96]))
+        np.testing.assert_array_equal(np.array(metrics_scores["f1_score"].columns),
+                                      np.array(["ada-0"]))
+
+    def test_multiple_monoview_classifiers(self):
+        metrics = [["accuracy_score"], ["f1_score"]]
+        results = [MonoviewResult(view_index=0,
+                                  classifier_name="ada",
+                                  view_name="0",
+                                  metrics_scores={"accuracy_score": [0.9, 0.95],
+                                   "f1_score": [0.91, 0.96]},
+                                  full_labels_pred="",
+                                  classifier_config="",
+                                  classifier="",
+                                  n_features="",
+                                  hps_duration=0,
+                                  fit_duration=0,
+                                  pred_duration=0),
+                   MonoviewResult(view_index=0,
+                                  classifier_name="dt",
+                                  view_name="1",
+                                  metrics_scores={"accuracy_score": [0.8, 0.85],
+                                   "f1_score": [0.81, 0.86]},
+                                  full_labels_pred="",
+                                  classifier_config="",
+                                  classifier="",
+                                  n_features="",
+                                  hps_duration=0,
+                                  fit_duration=0,
+                                  pred_duration=0)
+                   ]
+        metrics_scores = get_metrics_scores(metrics,
+                                                            results)
+        self.assertIsInstance(metrics_scores, dict)
+        self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["accuracy_score"].loc["train"]),
+            np.array([0.9, 0.8]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["accuracy_score"].loc["test"]),
+            np.array([0.95, 0.85]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].loc["train"]),
+            np.array([0.91, 0.81]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].loc["test"]),
+            np.array([0.96, 0.86]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].columns),
+            np.array(["ada-0", "dt-1"]))
+
+    def test_mutiview_result(self):
+        metrics = [["accuracy_score"], ["f1_score"]]
+        results = [MultiviewResult("mv", "", {"accuracy_score": [0.7, 0.75],
+                                   "f1_score": [0.71, 0.76]}, "",0,0,0 ),
+                   MonoviewResult(view_index=0,
+                                  classifier_name="dt",
+                                  view_name="1",
+                                  metrics_scores={"accuracy_score": [0.8, 0.85],
+                                                  "f1_score": [0.81, 0.86]},
+                                  full_labels_pred="",
+                                  classifier_config="",
+                                  classifier="",
+                                  n_features="",
+                                  hps_duration=0,
+                                  fit_duration=0,
+                                  pred_duration=0)
+                   ]
+        metrics_scores = get_metrics_scores(metrics,
+                                                            results)
+        self.assertIsInstance(metrics_scores, dict)
+        self.assertIsInstance(metrics_scores["accuracy_score"], pd.DataFrame)
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["accuracy_score"].loc["train"]),
+            np.array([0.7, 0.8]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["accuracy_score"].loc["test"]),
+            np.array([0.75, 0.85]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].loc["train"]),
+            np.array([0.71, 0.81]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].loc["test"]),
+            np.array([0.76, 0.86]))
+        np.testing.assert_array_equal(
+            np.array(metrics_scores["f1_score"].columns),
+            np.array(["mv", "dt-1"]))
+
+
+class Test_init_plot(unittest.TestCase):
+
+    def test_simple(self):
+        results = []
+        metric_name = "acc"
+        data = np.random.RandomState(42).uniform(0,1,(2,2))
+        metric_dataframe = pd.DataFrame(index=["train", "test"],
+                                        columns=["dt-1", "mv"], data=data)
+        directory = "dir"
+        database_name = 'db'
+        labels_names = ['lb1', "lb2"]
+        train, test, classifier_names, \
+        file_name, nb_results, results = init_plot(results,
+                                                                   metric_name,
+                                                                   metric_dataframe,
+                                                                   directory,
+                                                                   database_name,
+                                                                   labels_names)
+        self.assertEqual(file_name, os.path.join("dir", "db-lb1_vs_lb2-acc"))
+        np.testing.assert_array_equal(train, data[0,:])
+        np.testing.assert_array_equal(test, data[1, :])
+        np.testing.assert_array_equal(classifier_names, np.array(["dt-1", "mv"]))
+        self.assertEqual(nb_results, 2)
+        self.assertEqual(results, [["dt-1", "acc", data[1,0], 0],
+                                   ["mv", "acc", data[1,1], 0]])
\ No newline at end of file
diff --git a/multiview_platform/tests/test_result_analysis/test_noise_analysis.py b/multiview_platform/tests/test_result_analysis/test_noise_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/multiview_platform/tests/test_result_analysis/test_tracebacks_analysis.py b/multiview_platform/tests/test_result_analysis/test_tracebacks_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391