From cda7ba8b19268136eb7873ce5653256440def98a Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Thu, 27 Feb 2020 21:32:45 +0100 Subject: [PATCH] Added duration tracking for each iteration --- .../monoview/exec_classif_mono_view.py | 22 ++++--- .../monoview/monoview_utils.py | 6 +- .../multiview/exec_multiview.py | 15 +++-- .../multiview/multiview_utils.py | 6 +- .../result_analysis.py | 65 ++++++++++++++++++- 5 files changed, 96 insertions(+), 18 deletions(-) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index ef0bf719..84d45a22 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -88,12 +88,14 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i logging.debug("Start:\t Generate classifier args") classifier_module = getattr(monoview_classifiers, classifier_name) classifier_class_name = classifier_module.classifier_class_name + hyper_param_beg = time.monotonic() cl_kwargs, test_folds_preds = get_hyper_params(classifier_module, hyper_param_search, n_iter, classifier_name, classifier_class_name, X_train, y_train, random_state, output_file_name, k_folds, nb_cores, metrics, kwargs) + hyper_param_duration = time.monotonic() - hyper_param_beg logging.debug("Done:\t Generate classifier args") logging.debug("Start:\t Training") @@ -103,13 +105,16 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i (random_state, **cl_kwargs), random_state, y=Y) - + fit_beg = time.monotonic() classifier.fit(X_train, y_train) # NB_CORES=nbCores, + fit_duration = time.monotonic() - fit_beg logging.debug("Done:\t Training") logging.debug("Start:\t Predicting") train_pred = classifier.predict(X_train) + pred_beg = time.monotonic() test_pred = classifier.predict(X_test) + pred_duration = time.monotonic() - pred_beg # Filling the full prediction in the right order full_pred = np.zeros(Y.shape, dtype=int) - 100 @@ -120,9 +125,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i logging.debug("Done:\t Predicting") - duration = time.time() - t_start + whole_duration = time.monotonic() - t_start logging.debug( - "Info:\t Time for training and predicting: " + str(duration) + "[s]") + "Info:\t Duration for training and predicting: " + str(whole_duration) + "[s]") logging.debug("Start:\t Getting results") result_analyzer = MonoviewResultAnalyzer(view_name=view_name, @@ -141,7 +146,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i labels=Y, database_name=database_name, nb_cores=nb_cores, - duration=duration) + duration=whole_duration) string_analysis, images_analysis, metrics_scores = result_analyzer.analyze() logging.debug("Done:\t Getting results") @@ -154,10 +159,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i if test_folds_preds is None: test_folds_preds = train_pred return MonoviewResult(view_index, classifier_name, view_name, - metrics_scores, - full_pred, cl_kwargs, - test_folds_preds, classifier, - X_train.shape[1]) + metrics_scores, full_pred, cl_kwargs, + test_folds_preds, classifier, X_train.shape[1], + hyper_param_duration, fit_duration, pred_duration) def init_constants(args, X, classification_indices, labels_names, @@ -166,7 +170,7 @@ def init_constants(args, X, classification_indices, labels_names, kwargs = args["args"] except KeyError: kwargs = args - t_start = time.time() + t_start = time.monotonic() cl_type = kwargs["classifier_name"] learning_rate = float(len(classification_indices[0])) / ( len(classification_indices[0]) + len(classification_indices[1])) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 6912bc98..321f4195 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -156,7 +156,8 @@ def percent(x, pos): class MonoviewResult(object): def __init__(self, view_index, classifier_name, view_name, metrics_scores, full_labels_pred, classifier_config, test_folds_preds, - classifier, n_features): + classifier, n_features, hps_duration, fit_duration, + pred_duration): self.view_index = view_index self.classifier_name = classifier_name self.view_name = view_name @@ -166,6 +167,9 @@ class MonoviewResult(object): self.test_folds_preds = test_folds_preds self.clf = classifier self.n_features = n_features + self.hps_duration = hps_duration + self.fit_duration = fit_duration + self.pred_duration = pred_duration def get_classifier_name(self): return self.classifier_name + "-" + self.view_name diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 4fc5ca7f..7e5d5e52 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -258,6 +258,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.debug("Done:\t Getting classifiers modules") logging.debug("Start:\t Optimizing hyperparameters") + hps_beg = time.monotonic() if hyper_param_search != "None": classifier_config = hyper_parameter_search.search_best_settings( dataset_var, dataset_var.get_labels(), classifier_module, @@ -266,6 +267,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, output_file_name, nb_cores=nb_cores, views_indices=views_indices, searching_tool=hyper_param_search, n_iter=n_iter, classifier_config=classifier_config) + hps_duration = time.monotonic() - hps_beg classifier = get_mc_estim( getattr(classifier_module, classifier_name)(random_state=random_state, **classifier_config), @@ -273,31 +275,35 @@ def exec_multiview(directory, dataset_var, name, classification_indices, y=dataset_var.get_labels()) logging.debug("Done:\t Optimizing hyperparameters") logging.debug("Start:\t Fitting classifier") + fit_beg = time.monotonic() classifier.fit(dataset_var, dataset_var.get_labels(), train_indices=learning_indices, view_indices=views_indices) + fit_duration = time.monotonic() - fit_beg logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") train_pred = classifier.predict(dataset_var, example_indices=learning_indices, view_indices=views_indices) + pred_beg = time.monotonic() test_pred = classifier.predict(dataset_var, example_indices=validation_indices, view_indices=views_indices) + pred_duration = time.monotonic() - pred_beg full_labels = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100 full_labels[learning_indices] = train_pred full_labels[validation_indices] = test_pred logging.info("Done:\t Pertidcting") - classification_time = time.time() - t_start + whole_duration = time.time() - t_start logging.info( "Info:\t Classification duration " + str(extraction_time) + "s") # TODO: get better cltype logging.info("Start:\t Result Analysis for " + cl_type) - times = (extraction_time, classification_time) + times = (extraction_time, whole_duration) result_analyzer = MultiviewResultAnalyzer(view_names=views, classifier=classifier, classification_indices=classification_indices, @@ -312,7 +318,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices, labels=labels, database_name=dataset_var.get_name(), nb_cores=nb_cores, - duration=classification_time) + duration=whole_duration) string_analysis, images_analysis, metrics_scores = result_analyzer.analyze() logging.info("Done:\t Result Analysis for " + cl_type) @@ -321,4 +327,5 @@ def exec_multiview(directory, dataset_var, name, classification_indices, logging.debug("Start:\t Saving preds") return MultiviewResult(cl_type, classifier_config, metrics_scores, - full_labels) + full_labels, hps_duration, fit_duration, + pred_duration) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index 4735a646..7503bbe4 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -151,11 +151,15 @@ from .. import multiview_classifiers class MultiviewResult(object): def __init__(self, classifier_name, classifier_config, - metrics_scores, full_labels): + metrics_scores, full_labels, hps_duration, fit_duration, + pred_duration): self.classifier_name = classifier_name self.classifier_config = classifier_config self.metrics_scores = metrics_scores self.full_labels_pred = full_labels + self.hps_duration = hps_duration + self.fit_duration = fit_duration + self.pred_duration = pred_duration def get_classifier_name(self): try: diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index a473ffc1..bf66ff69 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -169,7 +169,7 @@ def plot_metric_scores(train_scores, test_scores, names, nb_results, )) fig.update_layout( - title=metric_name + "\n" + tag + " scores for each classifier") + title=metric_name + "<br>" + tag + " scores for each classifier") fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) @@ -619,6 +619,48 @@ def publish_example_errors(example_errors, directory, databaseName, logging.debug("Done:\t Biclass Label analysis figures generation") +def plot_durations(durations, directory, database_name, durations_stds=None): + file_name = os.path.join(directory, database_name + "-durations") + + fig = plotly.graph_objs.Figure() + if durations_stds is None: + durations_stds = {} + for dur_key, dur_val in durations.items(): + durations_stds[dur_key] = dict((key, 0) + for key, val in durations[dur_key].items()) + fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization', + x=list(durations['hps'].keys()), + y=list(durations['hps'].values()), + error_y=dict(type='data', + array=list(durations_stds[ + "hps"].values())), + marker_color="grey")) + fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)', + x=list(durations['fit'].keys()), + y=list(durations['fit'].values()), + error_y=dict(type='data', + array=list(durations_stds[ + "fit"].values())), + marker_color="black")) + fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)', + x=list(durations['pred'].keys()), + y=list(durations['pred'].values()), + error_y=dict(type='data', + array=list(durations_stds[ + "pred"].values())), + marker_color="lightgrey")) + fig.update_layout(title="Durations for each classfier") + fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)') + plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False) + index = durations["hps"].keys() + df = pd.DataFrame(index=index, + columns=["hps", "fit", "pred"],) + for key, value in durations.items(): + df[key] = [value[ind] for ind in index] + df.to_csv(file_name+"_dataframe.csv") + + def publish_feature_importances(feature_importances, directory, database_name, feature_stds=None): for view_name, feature_importance in feature_importances.items(): @@ -712,6 +754,19 @@ def get_feature_importances(result, feature_names=None): return feature_importances +def get_duration(results): + durations = {"hps":{}, "fit":{}, "pred":{}} + for classifier_result in results: + durations["hps"][ + classifier_result.get_classifier_name()] = classifier_result.hps_duration + durations["fit"][ + classifier_result.get_classifier_name()] = classifier_result.fit_duration + durations["pred"][ + classifier_result.get_classifier_name()] = classifier_result.pred_duration + return durations + + + def publish_tracebacks(directory, database_name, labels_names, tracebacks, iter_index): if tracebacks: @@ -733,7 +788,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, Parameters ---------- results : list - The result list returned by the bencmark execution function. For each executed benchmark, contains + The result list returned by the benchmark execution function. For each executed benchmark, contains a flag & a result element. The flag is a way to identify to which benchmark the results belong, formatted this way : `flag = iter_index, [classifierPositive, classifierNegative]` with @@ -756,7 +811,8 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, logging.debug("Srart:\t Analzing all biclass resuls") iter_results = {"metrics_scores": [i for i in range(stats_iter)], "example_errors": [i for i in range(stats_iter)], - "feature_importances": [i for i in range(stats_iter)]} + "feature_importances": [i for i in range(stats_iter)], + "durations":[i for i in range(stats_iter)]} flagged_tracebacks_list = [] fig_errors = [] for iter_index, result, tracebacks in results: @@ -765,6 +821,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, metrics_scores = get_metrics_scores_biclass(metrics, result) example_errors = get_example_errors_biclass(labels, result) feature_importances = get_feature_importances(result) + durations = get_duration(result) directory = arguments["directory"] database_name = arguments["args"]["name"] @@ -780,11 +837,13 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter, labels_names, example_ids, labels) publish_feature_importances(feature_importances, directory, database_name) + plot_durations(durations, directory, database_name) iter_results["metrics_scores"][iter_index] = metrics_scores iter_results["example_errors"][iter_index] = example_errors iter_results["feature_importances"][iter_index] = feature_importances iter_results["labels"] = labels + iter_results["durations"][iter_index] = durations logging.debug("Done:\t Analzing all biclass resuls") -- GitLab