Skip to content
Snippets Groups Projects
Commit cda7ba8b authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added duration tracking for each iteration

parent 98e14bb6
No related branches found
No related tags found
No related merge requests found
......@@ -88,12 +88,14 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
logging.debug("Start:\t Generate classifier args")
classifier_module = getattr(monoview_classifiers, classifier_name)
classifier_class_name = classifier_module.classifier_class_name
hyper_param_beg = time.monotonic()
cl_kwargs, test_folds_preds = get_hyper_params(classifier_module, hyper_param_search,
n_iter, classifier_name,
classifier_class_name,
X_train, y_train,
random_state, output_file_name,
k_folds, nb_cores, metrics, kwargs)
hyper_param_duration = time.monotonic() - hyper_param_beg
logging.debug("Done:\t Generate classifier args")
logging.debug("Start:\t Training")
......@@ -103,13 +105,16 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
(random_state, **cl_kwargs),
random_state,
y=Y)
fit_beg = time.monotonic()
classifier.fit(X_train, y_train) # NB_CORES=nbCores,
fit_duration = time.monotonic() - fit_beg
logging.debug("Done:\t Training")
logging.debug("Start:\t Predicting")
train_pred = classifier.predict(X_train)
pred_beg = time.monotonic()
test_pred = classifier.predict(X_test)
pred_duration = time.monotonic() - pred_beg
# Filling the full prediction in the right order
full_pred = np.zeros(Y.shape, dtype=int) - 100
......@@ -120,9 +125,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
logging.debug("Done:\t Predicting")
duration = time.time() - t_start
whole_duration = time.monotonic() - t_start
logging.debug(
"Info:\t Time for training and predicting: " + str(duration) + "[s]")
"Info:\t Duration for training and predicting: " + str(whole_duration) + "[s]")
logging.debug("Start:\t Getting results")
result_analyzer = MonoviewResultAnalyzer(view_name=view_name,
......@@ -141,7 +146,7 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
labels=Y,
database_name=database_name,
nb_cores=nb_cores,
duration=duration)
duration=whole_duration)
string_analysis, images_analysis, metrics_scores = result_analyzer.analyze()
logging.debug("Done:\t Getting results")
......@@ -154,10 +159,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
if test_folds_preds is None:
test_folds_preds = train_pred
return MonoviewResult(view_index, classifier_name, view_name,
metrics_scores,
full_pred, cl_kwargs,
test_folds_preds, classifier,
X_train.shape[1])
metrics_scores, full_pred, cl_kwargs,
test_folds_preds, classifier, X_train.shape[1],
hyper_param_duration, fit_duration, pred_duration)
def init_constants(args, X, classification_indices, labels_names,
......@@ -166,7 +170,7 @@ def init_constants(args, X, classification_indices, labels_names,
kwargs = args["args"]
except KeyError:
kwargs = args
t_start = time.time()
t_start = time.monotonic()
cl_type = kwargs["classifier_name"]
learning_rate = float(len(classification_indices[0])) / (
len(classification_indices[0]) + len(classification_indices[1]))
......
......@@ -156,7 +156,8 @@ def percent(x, pos):
class MonoviewResult(object):
def __init__(self, view_index, classifier_name, view_name, metrics_scores,
full_labels_pred, classifier_config, test_folds_preds,
classifier, n_features):
classifier, n_features, hps_duration, fit_duration,
pred_duration):
self.view_index = view_index
self.classifier_name = classifier_name
self.view_name = view_name
......@@ -166,6 +167,9 @@ class MonoviewResult(object):
self.test_folds_preds = test_folds_preds
self.clf = classifier
self.n_features = n_features
self.hps_duration = hps_duration
self.fit_duration = fit_duration
self.pred_duration = pred_duration
def get_classifier_name(self):
return self.classifier_name + "-" + self.view_name
......
......@@ -258,6 +258,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
logging.debug("Done:\t Getting classifiers modules")
logging.debug("Start:\t Optimizing hyperparameters")
hps_beg = time.monotonic()
if hyper_param_search != "None":
classifier_config = hyper_parameter_search.search_best_settings(
dataset_var, dataset_var.get_labels(), classifier_module,
......@@ -266,6 +267,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
output_file_name, nb_cores=nb_cores, views_indices=views_indices,
searching_tool=hyper_param_search, n_iter=n_iter,
classifier_config=classifier_config)
hps_duration = time.monotonic() - hps_beg
classifier = get_mc_estim(
getattr(classifier_module, classifier_name)(random_state=random_state,
**classifier_config),
......@@ -273,31 +275,35 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
y=dataset_var.get_labels())
logging.debug("Done:\t Optimizing hyperparameters")
logging.debug("Start:\t Fitting classifier")
fit_beg = time.monotonic()
classifier.fit(dataset_var, dataset_var.get_labels(),
train_indices=learning_indices,
view_indices=views_indices)
fit_duration = time.monotonic() - fit_beg
logging.debug("Done:\t Fitting classifier")
logging.debug("Start:\t Predicting")
train_pred = classifier.predict(dataset_var,
example_indices=learning_indices,
view_indices=views_indices)
pred_beg = time.monotonic()
test_pred = classifier.predict(dataset_var,
example_indices=validation_indices,
view_indices=views_indices)
pred_duration = time.monotonic() - pred_beg
full_labels = np.zeros(dataset_var.get_labels().shape, dtype=int) - 100
full_labels[learning_indices] = train_pred
full_labels[validation_indices] = test_pred
logging.info("Done:\t Pertidcting")
classification_time = time.time() - t_start
whole_duration = time.time() - t_start
logging.info(
"Info:\t Classification duration " + str(extraction_time) + "s")
# TODO: get better cltype
logging.info("Start:\t Result Analysis for " + cl_type)
times = (extraction_time, classification_time)
times = (extraction_time, whole_duration)
result_analyzer = MultiviewResultAnalyzer(view_names=views,
classifier=classifier,
classification_indices=classification_indices,
......@@ -312,7 +318,7 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
labels=labels,
database_name=dataset_var.get_name(),
nb_cores=nb_cores,
duration=classification_time)
duration=whole_duration)
string_analysis, images_analysis, metrics_scores = result_analyzer.analyze()
logging.info("Done:\t Result Analysis for " + cl_type)
......@@ -321,4 +327,5 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
logging.debug("Start:\t Saving preds")
return MultiviewResult(cl_type, classifier_config, metrics_scores,
full_labels)
full_labels, hps_duration, fit_duration,
pred_duration)
......@@ -151,11 +151,15 @@ from .. import multiview_classifiers
class MultiviewResult(object):
def __init__(self, classifier_name, classifier_config,
metrics_scores, full_labels):
metrics_scores, full_labels, hps_duration, fit_duration,
pred_duration):
self.classifier_name = classifier_name
self.classifier_config = classifier_config
self.metrics_scores = metrics_scores
self.full_labels_pred = full_labels
self.hps_duration = hps_duration
self.fit_duration = fit_duration
self.pred_duration = pred_duration
def get_classifier_name(self):
try:
......
......@@ -169,7 +169,7 @@ def plot_metric_scores(train_scores, test_scores, names, nb_results,
))
fig.update_layout(
title=metric_name + "\n" + tag + " scores for each classifier")
title=metric_name + "<br>" + tag + " scores for each classifier")
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)')
plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
......@@ -619,6 +619,48 @@ def publish_example_errors(example_errors, directory, databaseName,
logging.debug("Done:\t Biclass Label analysis figures generation")
def plot_durations(durations, directory, database_name, durations_stds=None):
file_name = os.path.join(directory, database_name + "-durations")
fig = plotly.graph_objs.Figure()
if durations_stds is None:
durations_stds = {}
for dur_key, dur_val in durations.items():
durations_stds[dur_key] = dict((key, 0)
for key, val in durations[dur_key].items())
fig.add_trace(plotly.graph_objs.Bar(name='Hyper-parameter Optimization',
x=list(durations['hps'].keys()),
y=list(durations['hps'].values()),
error_y=dict(type='data',
array=list(durations_stds[
"hps"].values())),
marker_color="grey"))
fig.add_trace(plotly.graph_objs.Bar(name='Fit (on train set)',
x=list(durations['fit'].keys()),
y=list(durations['fit'].values()),
error_y=dict(type='data',
array=list(durations_stds[
"fit"].values())),
marker_color="black"))
fig.add_trace(plotly.graph_objs.Bar(name='Prediction (on test set)',
x=list(durations['pred'].keys()),
y=list(durations['pred'].values()),
error_y=dict(type='data',
array=list(durations_stds[
"pred"].values())),
marker_color="lightgrey"))
fig.update_layout(title="Durations for each classfier")
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)')
plotly.offline.plot(fig, filename=file_name + ".html", auto_open=False)
index = durations["hps"].keys()
df = pd.DataFrame(index=index,
columns=["hps", "fit", "pred"],)
for key, value in durations.items():
df[key] = [value[ind] for ind in index]
df.to_csv(file_name+"_dataframe.csv")
def publish_feature_importances(feature_importances, directory, database_name,
feature_stds=None):
for view_name, feature_importance in feature_importances.items():
......@@ -712,6 +754,19 @@ def get_feature_importances(result, feature_names=None):
return feature_importances
def get_duration(results):
durations = {"hps":{}, "fit":{}, "pred":{}}
for classifier_result in results:
durations["hps"][
classifier_result.get_classifier_name()] = classifier_result.hps_duration
durations["fit"][
classifier_result.get_classifier_name()] = classifier_result.fit_duration
durations["pred"][
classifier_result.get_classifier_name()] = classifier_result.pred_duration
return durations
def publish_tracebacks(directory, database_name, labels_names, tracebacks,
iter_index):
if tracebacks:
......@@ -733,7 +788,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
Parameters
----------
results : list
The result list returned by the bencmark execution function. For each executed benchmark, contains
The result list returned by the benchmark execution function. For each executed benchmark, contains
a flag & a result element.
The flag is a way to identify to which benchmark the results belong, formatted this way :
`flag = iter_index, [classifierPositive, classifierNegative]` with
......@@ -756,7 +811,8 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
logging.debug("Srart:\t Analzing all biclass resuls")
iter_results = {"metrics_scores": [i for i in range(stats_iter)],
"example_errors": [i for i in range(stats_iter)],
"feature_importances": [i for i in range(stats_iter)]}
"feature_importances": [i for i in range(stats_iter)],
"durations":[i for i in range(stats_iter)]}
flagged_tracebacks_list = []
fig_errors = []
for iter_index, result, tracebacks in results:
......@@ -765,6 +821,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
metrics_scores = get_metrics_scores_biclass(metrics, result)
example_errors = get_example_errors_biclass(labels, result)
feature_importances = get_feature_importances(result)
durations = get_duration(result)
directory = arguments["directory"]
database_name = arguments["args"]["name"]
......@@ -780,11 +837,13 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
labels_names, example_ids, labels)
publish_feature_importances(feature_importances, directory,
database_name)
plot_durations(durations, directory, database_name)
iter_results["metrics_scores"][iter_index] = metrics_scores
iter_results["example_errors"][iter_index] = example_errors
iter_results["feature_importances"][iter_index] = feature_importances
iter_results["labels"] = labels
iter_results["durations"][iter_index] = durations
logging.debug("Done:\t Analzing all biclass resuls")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment