From cda7ba8b19268136eb7873ce5653256440def98a Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Thu, 27 Feb 2020 21:32:45 +0100
Subject: [PATCH] Added duration tracking for each iteration

---
 .../monoview/exec_classif_mono_view.py        | 22 ++++---
 .../monoview/monoview_utils.py                |  6 +-
 .../multiview/exec_multiview.py               | 15 +++--
 .../multiview/multiview_utils.py              |  6 +-
 .../result_analysis.py                        | 65 ++++++++++++++++++-
 5 files changed, 96 insertions(+), 18 deletions(-)

diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py
index ef0bf719..84d45a22 100644
--- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py
+++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py
@@ -88,12 +88,14 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
     logging.debug("Start:\t Generate classifier args")
     classifier_module = getattr(monoview_classifiers, classifier_name)
     classifier_class_name = classifier_module.classifier_class_name
+    hyper_param_beg = time.monotonic()
     cl_kwargs, test_folds_preds = get_hyper_params(classifier_module, hyper_param_search,
                                                    n_iter, classifier_name,
                                                    classifier_class_name,
                                                    X_train, y_train,
                                                    random_state, output_file_name,
                                                    k_folds, nb_cores, metrics, kwargs)
+    hyper_param_duration = time.monotonic() - hyper_param_beg
     logging.debug("Done:\t Generate classifier args")
 
     logging.debug("Start:\t Training")
@@ -103,13 +105,16 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
                               (random_state, **cl_kwargs),
                               random_state,
                               y=Y)
-
+    fit_beg = time.monotonic()
     classifier.fit(X_train, y_train)  # NB_CORES=nbCores,
+    fit_duration = time.monotonic() - fit_beg
     logging.debug("Done:\t Training")
 
     logging.debug("Start:\t Predicting")
     train_pred = classifier.predict(X_train)
+    pred_beg = time.monotonic()
     test_pred = classifier.predict(X_test)
+    pred_duration = time.monotonic() - pred_beg
 
     # Filling the full prediction in the right order
     full_pred = np.zeros(Y.shape, dtype=int) - 100
@@ -120,9 +125,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
 
     logging.debug("Done:\t Predicting")
 
-    duration = time.time() - t_start
+    whole_duration = time.monotonic() - t_start
     logging.debug(
-        "Info:\t Time for training and predicting: " + str(duration) + "[s]")
+        "Info:\t Duration for training and predicting: " + str(whole_duration) + "[s]")
 
     logging.debug("Start:\t Getting results")
     result_analyzer = MonoviewResultAnalyzer(view_name=view_name,
@@ -141,7 +146,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
                                              labels=Y,
                                              database_name=database_name,
                                              nb_cores=nb_cores,
-                                             duration=duration)
+                                             duration=whole_duration)
     string_analysis, images_analysis, metrics_scores = result_analyzer.analyze()
     logging.debug("Done:\t Getting results")
 
@@ -154,10 +159,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
     if test_folds_preds is None:
         test_folds_preds = train_pred
     return MonoviewResult(view_index, classifier_name, view_name,
-                                         metrics_scores,
-                                         full_pred, cl_kwargs,
-                                         test_folds_preds, classifier,
-                                         X_train.shape[1])
+                          metrics_scores, full_pred, cl_kwargs,
+                          test_folds_preds, classifier, X_train.shape[1],
+                          hyper_param_duration, fit_duration, pred_duration)
 
 
 def init_constants(args, X, classification_indices, labels_names,
@@ -166,7 +170,7 @@ def init_constants(args, X, classification_indices, labels_names,
         kwargs = args["args"]
     except KeyError:
         kwargs = args
-    t_start = time.time()
+    t_start = time.monotonic()
     cl_type = kwargs["classifier_name"]
     learning_rate = float(len(classification_indices[0])) / (
             len(classification_indices[0]) + len(classification_indices[1]))
diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py
index 6912bc98..321f4195 100644
--- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py
+++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py
@@ -156,7 +156,8 @@ def percent(x, pos):
 class MonoviewResult(object):
     def __init__(self, view_index, classifier_name, view_name, metrics_scores,
                  full_labels_pred, classifier_config, test_folds_preds,
-                 classifier, n_features):
+                 classifier, n_features, hps_duration, fit_duration,
+                 pred_duration):
         self.view_index = view_index
         self.classifier_name = classifier_name
         self.view_name = view_name
@@ -166,6 +167,9 @@ class MonoviewResult(object):
         self.test_folds_preds = test_folds_preds
         self.clf = classifier
         self.n_features = n_features
+        self.hps_duration = hps_duration
+        self.fit_duration = fit_duration
+        self.pred_duration = pred_duration
 
     def get_classifier_name(self):
         return self.classifier_name + "-" + self.view_name
diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py
index 4fc5ca7f..7e5d5e52 100644
--- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py
+++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py
@@ -258,6 +258,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
     logging.debug("Done:\t Getting classifiers modules")
 
     logging.debug("Start:\t Optimizing hyperparameters")
+    hps_beg = time.monotonic()
     if hyper_param_search != "None":
         classifier_config = hyper_parameter_search.search_best_settings(
             dataset_var, dataset_var.get_labels(), classifier_module,
@@ -266,6 +267,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
             output_file_name, nb_cores=nb_cores, views_indices=views_indices,
             searching_tool=hyper_param_search, n_iter=n_iter,
             classifier_config=classifier_config)
+    hps_duration = time.monotonic() - hps_beg
     classifier = get_mc_estim(
         getattr(classifier_module, classifier_name)(random_state=random_state,
                                                     **classifier_config),
@@ -273,31 +275,35 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
         y=dataset_var.get_labels())
     logging.debug("Done:\t Optimizing hyperparameters")
     logging.debug("Start:\t Fitting classifier")
+    fit_beg = time.monotonic()
     classifier.fit(dataset_var, dataset_var.get_labels(),
                    train_indices=learning_indices,
                    view_indices=views_indices)
+    fit_duration = time.monotonic() - fit_beg
     logging.debug("Done:\t Fitting classifier")
 
     logging.debug("Start:\t Predicting")
     train_pred = classifier.predict(dataset_var,
                                            example_indices=learning_indices,
                                            view_indices=views_indices)
+    pred_beg = time.monotonic()
     test_pred = classifier.predict(dataset_var,
                                           example_indices=validation_indices,
                                           view_indices=views_indices)
+    pred_duration = time.monotonic() - pred_beg
     full_labels = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100
     full_labels[learning_indices] = train_pred
     full_labels[validation_indices] = test_pred
     logging.info("Done:\t Pertidcting")
 
-    classification_time = time.time() - t_start
+    whole_duration = time.time() - t_start
     logging.info(
         "Info:\t Classification duration " + str(extraction_time) + "s")
 
     # TODO: get better cltype
 
     logging.info("Start:\t Result Analysis for " + cl_type)
-    times = (extraction_time, classification_time)
+    times = (extraction_time, whole_duration)
     result_analyzer = MultiviewResultAnalyzer(view_names=views,
                                               classifier=classifier,
                                               classification_indices=classification_indices,
@@ -312,7 +318,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
                                               labels=labels,
                                               database_name=dataset_var.get_name(),
                                               nb_cores=nb_cores,
-                                              duration=classification_time)
+                                              duration=whole_duration)
     string_analysis, images_analysis, metrics_scores = result_analyzer.analyze()
     logging.info("Done:\t Result Analysis for " + cl_type)
 
@@ -321,4 +327,5 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
     logging.debug("Start:\t Saving preds")
 
     return MultiviewResult(cl_type, classifier_config, metrics_scores,
-                           full_labels)
+                           full_labels, hps_duration, fit_duration,
+                           pred_duration)
diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py
index 4735a646..7503bbe4 100644
--- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py
+++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py
@@ -151,11 +151,15 @@ from .. import multiview_classifiers
 
 class MultiviewResult(object):
     def __init__(self, classifier_name, classifier_config,
-                 metrics_scores, full_labels):
+                 metrics_scores, full_labels, hps_duration, fit_duration,
+                 pred_duration):
         self.classifier_name = classifier_name
         self.classifier_config = classifier_config
         self.metrics_scores = metrics_scores
         self.full_labels_pred = full_labels
+        self.hps_duration = hps_duration
+        self.fit_duration = fit_duration
+        self.pred_duration = pred_duration
 
     def get_classifier_name(self):
         try:
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py
index a473ffc1..bf66ff69 100644
--- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py
@@ -169,7 +169,7 @@ def plot_metric_scores(train_scores, test_scores, names, nb_results,
         ))
 
         fig.update_layout(
-            title=metric_name + "\n" + tag + " scores for each classifier")
+            title=metric_name + "<br>" + tag + " scores for each classifier")
         fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
                           plot_bgcolor='rgba(0,0,0,0)')
         plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
@@ -619,6 +619,48 @@ def publish_example_errors(example_errors, directory, databaseName,
     logging.debug("Done:\t Biclass Label analysis figures generation")
 
 
+def plot_durations(durations, directory, database_name, durations_stds=None):
+    file_name = os.path.join(directory, database_name + "-durations")
+
+    fig = plotly.graph_objs.Figure()
+    if durations_stds is None:
+        durations_stds = {}
+        for dur_key, dur_val in durations.items():
+            durations_stds[dur_key] = dict((key, 0)
+                                        for key, val in durations[dur_key].items())
+    fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization',
+                                        x=list(durations['hps'].keys()),
+                                        y=list(durations['hps'].values()),
+                                        error_y=dict(type='data',
+                                                     array=list(durations_stds[
+                                                         "hps"].values())),
+                                        marker_color="grey"))
+    fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)',
+                                        x=list(durations['fit'].keys()),
+                                        y=list(durations['fit'].values()),
+                                        error_y=dict(type='data',
+                                                     array=list(durations_stds[
+                                                         "fit"].values())),
+                                        marker_color="black"))
+    fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)',
+                                        x=list(durations['pred'].keys()),
+                                        y=list(durations['pred'].values()),
+                                        error_y=dict(type='data',
+                                                     array=list(durations_stds[
+                                                         "pred"].values())),
+                                        marker_color="lightgrey"))
+    fig.update_layout(title="Durations for each classfier")
+    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
+                      plot_bgcolor='rgba(0,0,0,0)')
+    plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
+    index = durations["hps"].keys()
+    df = pd.DataFrame(index=index,
+                      columns=["hps", "fit", "pred"],)
+    for key, value in durations.items():
+        df[key] = [value[ind] for ind in index]
+    df.to_csv(file_name+"_dataframe.csv")
+
+
 def publish_feature_importances(feature_importances, directory, database_name,
                                 feature_stds=None):
     for view_name, feature_importance in feature_importances.items():
@@ -712,6 +754,19 @@ def get_feature_importances(result, feature_names=None):
     return feature_importances
 
 
+def get_duration(results):
+    durations = {"hps":{}, "fit":{}, "pred":{}}
+    for classifier_result in results:
+        durations["hps"][
+            classifier_result.get_classifier_name()] = classifier_result.hps_duration
+        durations["fit"][
+            classifier_result.get_classifier_name()] = classifier_result.fit_duration
+        durations["pred"][
+            classifier_result.get_classifier_name()] = classifier_result.pred_duration
+    return durations
+
+
+
 def publish_tracebacks(directory, database_name, labels_names, tracebacks,
                        iter_index):
     if tracebacks:
@@ -733,7 +788,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
     Parameters
     ----------
     results : list
-        The result list returned by the bencmark execution function. For each executed benchmark, contains
+        The result list returned by the benchmark execution function. For each executed benchmark, contains
         a flag & a result element.
         The flag is a way to identify to which benchmark the results belong, formatted this way :
         `flag = iter_index, [classifierPositive, classifierNegative]` with
@@ -756,7 +811,8 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
     logging.debug("Srart:\t Analzing all biclass resuls")
     iter_results = {"metrics_scores": [i for i in range(stats_iter)],
                     "example_errors": [i for i in range(stats_iter)],
-                    "feature_importances": [i for i in range(stats_iter)]}
+                    "feature_importances": [i for i in range(stats_iter)],
+                    "durations":[i for i in range(stats_iter)]}
     flagged_tracebacks_list = []
     fig_errors = []
     for iter_index, result, tracebacks in results:
@@ -765,6 +821,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
         metrics_scores = get_metrics_scores_biclass(metrics, result)
         example_errors = get_example_errors_biclass(labels, result)
         feature_importances = get_feature_importances(result)
+        durations = get_duration(result)
         directory = arguments["directory"]
 
         database_name = arguments["args"]["name"]
@@ -780,11 +837,13 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
                                labels_names, example_ids, labels)
         publish_feature_importances(feature_importances, directory,
                                     database_name)
+        plot_durations(durations, directory, database_name)
 
         iter_results["metrics_scores"][iter_index] = metrics_scores
         iter_results["example_errors"][iter_index] = example_errors
         iter_results["feature_importances"][iter_index] = feature_importances
         iter_results["labels"] = labels
+        iter_results["durations"][iter_index] = durations
 
     logging.debug("Done:\t Analzing all biclass resuls")
 
-- 
GitLab