Skip to content
Snippets Groups Projects
Commit c521a670 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

HPS report updated and result analysis

parent c6a47f40
Branches
Tags
No related merge requests found
......@@ -13,7 +13,7 @@ debug: True
add_noise: False
noise_std: 0.0
res_dir: "../results/"
track_tracebacks: True
track_tracebacks: False
# All the classification-realted configuration options
multiclass_method: "oneVersusOne"
......@@ -21,13 +21,13 @@ split: 0.49
nb_folds: 2
nb_class: 2
classes:
type: ["multiview", "monoview"]
type: ["multiview"]
algos_monoview: ["decision_tree" ]
algos_multiview: ["weighted_linear_early_fusion",]
stats_iter: 2
metrics: ["accuracy_score", "f1_score"]
metric_princ: "accuracy_score"
hps_type: "Random"
hps_type: "None"
hps_args:
n_iter: 4
equivalent_draws: False
......@@ -48,6 +48,34 @@ hps_args:
######################################
## The Monoview Classifier arguments #
######################################
weighted_linear_early_fusion:
monoview_classifier_config:
init: CustomDecisionTree
init__ccp_alpha: 0.0
init__class_weight: null
init__criterion: gini
init__max_depth: 1
init__max_features: null
init__max_leaf_nodes: null
init__min_impurity_decrease: 0.0
init__min_impurity_split: null
init__min_samples_leaf: 1
init__min_samples_split: 2
init__min_weight_fraction_leaf: 0.0
init__presort: deprecated
init__random_state: null
init__splitter: best
loss: exponential
max_depth: 5
n_estimators: 412
monoview_classifier_name: gradient_boosting
view_weights:
- 0.16666666666666669
- 0.16666666666666669
- 0.16666666666666669
- 0.16666666666666669
- 0.16666666666666669
- 0.16666666666666669
#
#random_forest:
# n_estimators: [25]
......
......@@ -199,7 +199,7 @@ def init_monoview_exps(classifier_names,
def gen_single_monoview_arg_dictionary(classifier_name, arguments, nb_class,
view_index, view_name, hps_kwargs):
if classifier_name in arguments:
classifier_config = dict((key, value[0]) for key, value in arguments[
classifier_config = dict((key, value) for key, value in arguments[
classifier_name].items())
else:
classifier_config = {}
......@@ -227,9 +227,6 @@ def extract_dict(classifier_config):
"""Reverse function of get_path_dict"""
extracted_dict = {}
for key, value in classifier_config.items():
if isinstance(value, list):
extracted_dict = set_element(extracted_dict, key, value[0])
else:
extracted_dict = set_element(extracted_dict, key, value)
return extracted_dict
......
......@@ -16,7 +16,7 @@ __status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "GradientBoosting"
class CustomDecisionTree(DecisionTreeClassifier):
class CustomDecisionTreeGB(DecisionTreeClassifier):
def predict(self, X, check_input=True):
y_pred = DecisionTreeClassifier.predict(self, X,
check_input=check_input)
......@@ -27,7 +27,7 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier):
def __init__(self, random_state=None, loss="exponential", max_depth=1.0,
n_estimators=100,
init=CustomDecisionTree(max_depth=1),
init=CustomDecisionTreeGB(max_depth=1),
**kwargs):
GradientBoostingClassifier.__init__(self,
loss=loss,
......
......@@ -60,21 +60,22 @@ def publish_example_errors(example_errors, directory, databaseName,
np.savetxt(base_file_name + "bar_plot_data.csv", error_on_examples,
delimiter=",")
plot_2d(data_2d, classifiers_names, nb_classifiers, nb_examples,
base_file_name, example_ids=example_ids, labels=labels)
plot_2d(data_2d, classifiers_names, nb_classifiers, base_file_name,
example_ids=example_ids, labels=labels)
plot_errors_bar(error_on_examples, nb_classifiers, nb_examples,
base_file_name)
plot_errors_bar(error_on_examples, nb_examples,
base_file_name, example_ids=example_ids)
logging.debug("Done:\t Biclass Label analysis figures generation")
def publish_all_example_errors(iter_results, directory,
stats_iter,
example_ids, labels):
logging.debug(
"Start:\t Global label analysis figure generation")
nbExamples, nbClassifiers, data, \
nb_examples, nb_classifiers, data, \
error_on_examples, classifier_names = gen_error_data_glob(iter_results,
stats_iter)
......@@ -82,17 +83,16 @@ def publish_all_example_errors(iter_results, directory,
np.savetxt(os.path.join(directory, "example_errors.csv"), error_on_examples,
delimiter=",")
plot_2d(data, classifier_names, nbClassifiers, nbExamples,
plot_2d(data, classifier_names, nb_classifiers,
os.path.join(directory, ""), stats_iter=stats_iter,
example_ids=example_ids, labels=labels)
plot_errors_bar(error_on_examples, nbClassifiers * stats_iter,
nbExamples, os.path.join(directory, ""))
plot_errors_bar(error_on_examples, nb_examples, os.path.join(directory, ""),
example_ids=example_ids)
logging.debug(
"Done:\t Global label analysis figures generation")
def gen_error_data(example_errors):
r"""Used to format the error data in order to plot it efficiently. The
data is saves in a `.csv` file.
......@@ -106,14 +106,6 @@ def gen_error_data(example_errors):
- 1 if the classifier `<classifier_name>` classifier well the example,
- 0 if it fail to classify the example,
- -100 if it did not classify the example (multiclass one versus one).
base_file_name : list of str
The name of the file in which the figure will be saved
("2D_plot_data.csv" and "bar_plot_data.csv" will
be added at the end).
nbCopies : int, optinal, default: 2
The number of times the data is copied (classifier wise) in order for
the figure to be more readable.
Returns
-------
......@@ -146,9 +138,10 @@ def gen_error_data(example_errors):
except:
import pdb;
pdb.set_trace()
error_on_examples = -1 * np.sum(data_2d, axis=1) / nb_classifiers
error_on_examples = np.sum(data_2d, axis=1) / nb_classifiers
return nb_classifiers, nb_examples, classifiers_names, data_2d, error_on_examples
def gen_error_data_glob(iter_results, stats_iter):
nb_examples = next(iter(iter_results.values())).shape[0]
nb_classifiers = len(iter_results)
......@@ -158,16 +151,14 @@ def gen_error_data_glob(iter_results, stats_iter):
iter_results.items()):
data[:, clf_index] = error_data
classifier_names.append(classifier_name)
error_on_examples = -1 * np.sum(data, axis=1) + (
error_on_examples = np.sum(data, axis=1) / (
nb_classifiers * stats_iter)
return nb_examples, nb_classifiers, data, error_on_examples, \
classifier_names
def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
file_name, minSize=10, labels=None,
width_denominator=2.0, height_denominator=20.0, stats_iter=1,
use_plotly=True, example_ids=None):
def plot_2d(data, classifiers_names, nb_classifiers, file_name, labels=None,
stats_iter=1, use_plotly=True, example_ids=None):
r"""Used to generate a 2D plot of the errors.
Parameters
......@@ -177,12 +168,8 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
and -100 if the example was not classified.
classifiers_names : list of str
The names of the classifiers.
nbClassifiers : int
nb_classifiers : int
The number of classifiers.
nbExamples : int
The number of examples.
nbCopies : int
The number of times the data is copied (classifier wise) in order for the figure to be more readable
file_name : str
The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end)
minSize : int, optinal, default: 10
......@@ -198,13 +185,18 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
-------
"""
fig, ax = plt.subplots(nrows=1, ncols=1, )
label_index_list = np.concatenate([np.where(labels == i)[0] for i in
np.unique(
labels)])
cmap, norm = iter_cmap(stats_iter)
cax = plt.imshow(data, cmap=cmap, norm=norm,
cax = plt.imshow(data[np.flip(label_index_list), :], cmap=cmap, norm=norm,
aspect='auto')
plt.title('Errors depending on the classifier')
ticks = np.arange(0, nbClassifiers, 1)
ticks = np.arange(0, nb_classifiers, 1)
tick_labels = classifiers_names
plt.xticks(ticks, tick_labels, rotation="vertical")
plt.yticks([], [])
plt.ylabel("Examples")
cbar = fig.colorbar(cax, ticks=[-100 * stats_iter / 2, 0, stats_iter])
cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right'])
......@@ -213,9 +205,7 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
plt.close()
### The following part is used to generate an interactive graph.
if use_plotly:
label_index_list = np.concatenate([np.where(labels == i)[0] for i in
np.unique(
labels)]) # [np.where(labels==i)[0] for i in np.unique(labels)]
# [np.where(labels==i)[0] for i in np.unique(labels)]
hover_text = [[example_ids[example_index] + " failed " + str(
stats_iter - data[
example_index, classifier_index]) + " time(s), labelled " + str(
......@@ -223,7 +213,6 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
for classifier_index in range(data.shape[1])]
for example_index in range(data.shape[0])]
fig = plotly.graph_objs.Figure()
# for row_index, label_index in enumerate(label_index_list):
fig.add_trace(plotly.graph_objs.Heatmap(
x=list(classifiers_names),
y=[example_ids[label_ind] for label_ind in label_index_list],
......@@ -234,8 +223,7 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
colorbar=dict(tickvals=[0, stats_iter],
ticktext=["Always Wrong", "Always Right"]),
reversescale=True), )
fig.update_yaxes(title_text="Examples", showticklabels=False, ticks='')
fig.update_xaxes(showticklabels=False, )
fig.update_yaxes(title_text="Examples", showticklabels=True)
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showticklabels=True, )
......@@ -244,7 +232,8 @@ def plot_2d(data, classifiers_names, nbClassifiers, nbExamples,
del fig
def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName):
def plot_errors_bar(error_on_examples, nb_examples, file_name,
use_plotly=True, example_ids=None):
r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples
Parameters
......@@ -253,23 +242,28 @@ def plot_errors_bar(error_on_examples, nbClassifiers, nbExamples, fileName):
An array counting how many classifiers failed to classifiy each examples.
classifiers_names : list of str
The names of the classifiers.
nbClassifiers : int
nb_classifiers : int
The number of classifiers.
nbExamples : int
nb_examples : int
The number of examples.
fileName : str
file_name : str
The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end)
Returns
-------
"""
fig, ax = plt.subplots()
x = np.arange(nbExamples)
x = np.arange(nb_examples)
plt.bar(x, error_on_examples)
plt.ylim([0, nbClassifiers])
plt.title("Number of classifiers that failed to classify each example")
fig.savefig(fileName + "error_analysis_bar.png", transparent=True)
fig.savefig(file_name + "error_analysis_bar.png", transparent=True)
plt.close()
if use_plotly:
fig = plotly.graph_objs.Figure([plotly.graph_objs.Bar(x=example_ids, y=error_on_examples)])
plotly.offline.plot(fig, filename=file_name + "error_analysis_bar.html",
auto_open=False)
def iter_cmap(statsIter):
......
import itertools
import sys
import traceback
import yaml
from abc import abstractmethod
import matplotlib.pyplot as plt
......@@ -8,7 +9,7 @@ import numpy as np
from scipy.stats import randint, uniform
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \
ParameterGrid, ParameterSampler
from sklearn.base import clone
from sklearn.base import clone, BaseEstimator
from .multiclass import MultiClassWrapper
from .organization import secure_file_path
......@@ -132,9 +133,8 @@ class HPSearch:
scores_array = scores_array[sorted_indices]
output_string = ""
for parameters, score in zip(tested_params, scores_array):
if "random_state" in parameters:
parameters.pop("random_state")
output_string += "\n{}\t\t{}".format(parameters, score)
formatted_params = format_params(parameters)
output_string += "\n{}\n\t\t{}".format(yaml.dump(formatted_params), score)
if self.tracebacks:
output_string += "Failed : \n\n\n"
for traceback, params in zip(self.tracebacks, self.tracebacks_params):
......@@ -417,6 +417,20 @@ class CustomUniform:
return unif
def format_params(params):
if isinstance(params, dict):
return dict((key, format_params(value))
for key, value in params.items()
if key!="random_state")
elif isinstance(params, BaseEstimator):
return params.__class__.__name__
elif isinstance(params, np.ndarray):
return [float(param) for param in params]
elif isinstance(params, np.float64):
return float(params)
else:
return params
# def randomized_search_(dataset_var, labels, classifier_package, classifier_name,
# metrics_list, learning_indices, k_folds, random_state,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment