Skip to content
Snippets Groups Projects
Commit 98e14bb6 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added tests on result analysis, added equiv support and missing whitespace"

parent 65b27dd5
No related branches found
No related tags found
No related merge requests found
from datetime import timedelta as hms # from datetime import timedelta as hms
#
from .. import metrics # from .. import metrics
from ..utils.base import get_metric # from ..utils.base import get_metric
#
#
def get_db_config_string(name, feat, classification_indices, shape, # def get_db_config_string(name, feat, classification_indices, shape,
class_labels_names, k_folds): # class_labels_names, k_folds):
""" # """
#
Parameters # Parameters
---------- # ----------
name # name
feat # feat
classification_indices # classification_indices
shape # shape
class_labels_names # class_labels_names
k_folds # k_folds
#
Returns # Returns
------- # -------
#
""" # """
learning_rate = float(len(classification_indices[0])) / ( # learning_rate = float(len(classification_indices[0])) / (
len(classification_indices[0]) + len(classification_indices[1])) # len(classification_indices[0]) + len(classification_indices[1]))
db_config_string = "Database configuration : \n" # db_config_string = "Database configuration : \n"
db_config_string += "\t- Database name : " + name + "\n" # db_config_string += "\t- Database name : " + name + "\n"
db_config_string += "\t- View name : " + feat + "\t View shape : " + str( # db_config_string += "\t- View name : " + feat + "\t View shape : " + str(
shape) + "\n" # shape) + "\n"
db_config_string += "\t- Learning Rate : " + str(learning_rate) + "\n" # db_config_string += "\t- Learning Rate : " + str(learning_rate) + "\n"
db_config_string += "\t- Labels used : " + ", ".join( # db_config_string += "\t- Labels used : " + ", ".join(
class_labels_names) + "\n" # class_labels_names) + "\n"
db_config_string += "\t- Number of cross validation folds : " + str( # db_config_string += "\t- Number of cross validation folds : " + str(
k_folds.n_splits) + "\n\n" # k_folds.n_splits) + "\n\n"
return db_config_string # return db_config_string
#
#
def get_classifier_config_string(grid_search, nb_cores, n_iter, cl_kwargs, # def get_classifier_config_string(grid_search, nb_cores, n_iter, cl_kwargs,
classifier, # classifier,
output_file_name, y_test): # output_file_name, y_test):
classifier_config_string = "Classifier configuration : \n" # classifier_config_string = "Classifier configuration : \n"
classifier_config_string += "\t- " + classifier.get_config()[5:] + "\n" # classifier_config_string += "\t- " + classifier.get_config()[5:] + "\n"
classifier_config_string += "\t- Executed on " + str( # classifier_config_string += "\t- Executed on " + str(
nb_cores) + " core(s) \n" # nb_cores) + " core(s) \n"
if grid_search: # if grid_search:
classifier_config_string += "\t- Got configuration using randomized search with " + str( # classifier_config_string += "\t- Got configuration using randomized search with " + str(
n_iter) + " iterations \n" # n_iter) + " iterations \n"
classifier_config_string += "\n\n" # classifier_config_string += "\n\n"
classifier_interpret_string = classifier.get_interpretation( # classifier_interpret_string = classifier.get_interpretation(
output_file_name, # output_file_name,
y_test) # y_test)
return classifier_config_string, classifier_interpret_string # return classifier_config_string, classifier_interpret_string
#
#
def get_metric_score(metric, y_train, y_train_pred, y_test, y_test_pred): # def get_metric_score(metric, y_train, y_train_pred, y_test, y_test_pred):
metric_module = getattr(metrics, metric[0]) # metric_module = getattr(metrics, metric[0])
if metric[1] is not None: # if metric[1] is not None:
metric_kwargs = dict((index, metricConfig) for index, metricConfig in # metric_kwargs = dict((index, metricConfig) for index, metricConfig in
enumerate(metric[1])) # enumerate(metric[1]))
else: # else:
metric_kwargs = {} # metric_kwargs = {}
metric_score_train = metric_module.score(y_train, y_train_pred) # metric_score_train = metric_module.score(y_train, y_train_pred)
metric_score_test = metric_module.score(y_test, y_test_pred) # metric_score_test = metric_module.score(y_test, y_test_pred)
metric_score_string = "\tFor " + metric_module.get_config( # metric_score_string = "\tFor " + metric_module.get_config(
**metric_kwargs) + " : " # **metric_kwargs) + " : "
metric_score_string += "\n\t\t- Score on train : " + str(metric_score_train) # metric_score_string += "\n\t\t- Score on train : " + str(metric_score_train)
metric_score_string += "\n\t\t- Score on test : " + str(metric_score_test) # metric_score_string += "\n\t\t- Score on test : " + str(metric_score_test)
metric_score_string += "\n" # metric_score_string += "\n"
return metric_score_string, [metric_score_train, metric_score_test] # return metric_score_string, [metric_score_train, metric_score_test]
#
#
def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list, # def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list,
n_iter, # n_iter,
feat, cl_type, cl_kwargs, class_labels_names, # feat, cl_type, cl_kwargs, class_labels_names,
shape, y_train, y_train_pred, y_test, y_test_pred, time, # shape, y_train, y_train_pred, y_test, y_test_pred, time,
random_state, classifier, output_file_name): # random_state, classifier, output_file_name):
metric_module, metric_kwargs = get_metric(metrics_list) # metric_module, metric_kwargs = get_metric(metrics_list)
train_score = metric_module.score(y_train, y_train_pred) # train_score = metric_module.score(y_train, y_train_pred)
test_score = metric_module.score(y_test, y_test_pred) # test_score = metric_module.score(y_test, y_test_pred)
string_analysis = "Classification on " + name + " database for " + feat + " with " + cl_type + ".\n\n" # string_analysis = "Classification on " + name + " database for " + feat + " with " + cl_type + ".\n\n"
string_analysis += metrics_list[0][0] + " on train : " + str( # string_analysis += metrics_list[0][0] + " on train : " + str(
train_score) + "\n" + \ # train_score) + "\n" + \
metrics_list[0][0] + " on test : " + str( # metrics_list[0][0] + " on test : " + str(
test_score) + "\n\n" # test_score) + "\n\n"
string_analysis += get_db_config_string(name, feat, learning_rate, shape, # string_analysis += get_db_config_string(name, feat, learning_rate, shape,
class_labels_names, k_folds) # class_labels_names, k_folds)
classifier_config_string, classifier_intepret_string = get_classifier_config_string( # classifier_config_string, classifier_intepret_string = get_classifier_config_string(
grid_search, nb_cores, n_iter, cl_kwargs, classifier, output_file_name, # grid_search, nb_cores, n_iter, cl_kwargs, classifier, output_file_name,
y_test) # y_test)
string_analysis += classifier_config_string # string_analysis += classifier_config_string
metrics_scores = {} # metrics_scores = {}
for metric in metrics_list: # for metric in metrics_list:
metric_string, metric_score = get_metric_score(metric, y_train, # metric_string, metric_score = get_metric_score(metric, y_train,
y_train_pred, y_test, # y_train_pred, y_test,
y_test_pred) # y_test_pred)
string_analysis += metric_string # string_analysis += metric_string
metrics_scores[metric[0]] = metric_score # metrics_scores[metric[0]] = metric_score
string_analysis += "\n\n Classification took " + str(hms(seconds=int(time))) # string_analysis += "\n\n Classification took " + str(hms(seconds=int(time)))
string_analysis += "\n\n Classifier Interpretation : \n" # string_analysis += "\n\n Classifier Interpretation : \n"
string_analysis += classifier_intepret_string # string_analysis += classifier_intepret_string
#
image_analysis = {} # image_analysis = {}
return string_analysis, image_analysis, metrics_scores # return string_analysis, image_analysis, metrics_scores
from .. import metrics # from .. import metrics
#
from ..utils.base import get_metric # from ..utils.base import get_metric
#
# Author-Info # # Author-Info
__author__ = "Baptiste Bauvin" # __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype # __status__ = "Prototype" # Production, Development, Prototype
#
#
def print_metric_score(metric_scores, metric_list): # def print_metric_score(metric_scores, metric_list):
""" # """
this function print the metrics scores # this function print the metrics scores
#
Parameters # Parameters
---------- # ----------
metric_scores : the score of metrics # metric_scores : the score of metrics
#
metric_list : list of metrics # metric_list : list of metrics
#
Returns # Returns
------- # -------
metric_score_string string constaining all metric results # metric_score_string string constaining all metric results
""" # """
metric_score_string = "\n\n" # metric_score_string = "\n\n"
for metric in metric_list: # for metric in metric_list:
metric_module = getattr(metrics, metric[0]) # metric_module = getattr(metrics, metric[0])
if metric[1] is not None: # if metric[1] is not None:
metric_kwargs = dict( # metric_kwargs = dict(
(index, metricConfig) for index, metricConfig in # (index, metricConfig) for index, metricConfig in
enumerate(metric[1])) # enumerate(metric[1]))
else: # else:
metric_kwargs = {} # metric_kwargs = {}
metric_score_string += "\tFor " + metric_module.get_config( # metric_score_string += "\tFor " + metric_module.get_config(
**metric_kwargs) + " : " # **metric_kwargs) + " : "
metric_score_string += "\n\t\t- Score on train : " + str( # metric_score_string += "\n\t\t- Score on train : " + str(
metric_scores[metric[0]][0]) # metric_scores[metric[0]][0])
metric_score_string += "\n\t\t- Score on test : " + str( # metric_score_string += "\n\t\t- Score on test : " + str(
metric_scores[metric[0]][1]) # metric_scores[metric[0]][1])
metric_score_string += "\n\n" # metric_score_string += "\n\n"
return metric_score_string # return metric_score_string
#
#
def get_total_metric_scores(metric, train_labels, test_labels, # def get_total_metric_scores(metric, train_labels, test_labels,
validation_indices, # validation_indices,
learning_indices, labels): # learning_indices, labels):
""" # """
#
Parameters # Parameters
---------- # ----------
#
metric : # metric :
#
train_labels : labels of train # train_labels : labels of train
#
test_labels : labels of test # test_labels : labels of test
#
validation_indices : # validation_indices :
#
learning_indices : # learning_indices :
#
labels : # labels :
#
Returns # Returns
------- # -------
list of [train_score, test_score] # list of [train_score, test_score]
""" # """
metric_module = getattr(metrics, metric[0]) # metric_module = getattr(metrics, metric[0])
if metric[1] is not None: # if metric[1] is not None:
metric_kwargs = dict((index, metricConfig) for index, metricConfig in # metric_kwargs = dict((index, metricConfig) for index, metricConfig in
enumerate(metric[1])) # enumerate(metric[1]))
else: # else:
metric_kwargs = {} # metric_kwargs = {}
train_score = metric_module.score(labels[learning_indices], train_labels, # train_score = metric_module.score(labels[learning_indices], train_labels,
**metric_kwargs) # **metric_kwargs)
test_score = metric_module.score(labels[validation_indices], test_labels, # test_score = metric_module.score(labels[validation_indices], test_labels,
**metric_kwargs) # **metric_kwargs)
return [train_score, test_score] # return [train_score, test_score]
#
#
def get_metrics_scores(metrics, train_labels, test_labels, # def get_metrics_scores(metrics, train_labels, test_labels,
validation_indices, learning_indices, labels): # validation_indices, learning_indices, labels):
metrics_scores = {} # metrics_scores = {}
for metric in metrics: # for metric in metrics:
metrics_scores[metric[0]] = get_total_metric_scores(metric, # metrics_scores[metric[0]] = get_total_metric_scores(metric,
train_labels, # train_labels,
test_labels, # test_labels,
validation_indices, # validation_indices,
learning_indices, # learning_indices,
labels) # labels)
return metrics_scores # return metrics_scores
#
#
def execute(classifier, pred_train_labels, pred_test_labels, # def execute(classifier, pred_train_labels, pred_test_labels,
classification_indices, labels_dictionary, views, name, k_folds, # classification_indices, labels_dictionary, views, name, k_folds,
metrics_list, labels, directory): # metrics_list, labels, directory):
""" # """
#
Parameters # Parameters
---------- # ----------
classifier : classifier used # classifier : classifier used
#
pred_train_labels : labels of train # pred_train_labels : labels of train
#
pred_test_labels : labels of test # pred_test_labels : labels of test
#
classification_indices # classification_indices
#
labels_dictionary # labels_dictionary
#
views # views
#
name # name
#
k_folds # k_folds
#
metrics_list # metrics_list
#
labels # labels
#
Returns # Returns
------- # -------
return tuple of (string_analysis, images_analysis, metricsScore) # return tuple of (string_analysis, images_analysis, metricsScore)
""" # """
classifier_name = classifier.short_name # classifier_name = classifier.short_name
learning_indices, validation_indices = classification_indices # learning_indices, validation_indices = classification_indices
metric_module, metric_kwargs = get_metric(metrics_list) # metric_module, metric_kwargs = get_metric(metrics_list)
score_on_train = metric_module.score(labels[learning_indices], # score_on_train = metric_module.score(labels[learning_indices],
pred_train_labels, # pred_train_labels,
**metric_kwargs) # **metric_kwargs)
score_on_test = metric_module.score(labels[validation_indices], # score_on_test = metric_module.score(labels[validation_indices],
pred_test_labels, **metric_kwargs) # pred_test_labels, **metric_kwargs)
#
string_analysis = "\t\tResult for multiview classification with " + classifier_name + \ # string_analysis = "\t\tResult for multiview classification with " + classifier_name + \
"\n\n" + metrics_list[0][0] + " :\n\t-On Train : " + str( # "\n\n" + metrics_list[0][0] + " :\n\t-On Train : " + str(
score_on_train) + "\n\t-On Test : " + str( # score_on_train) + "\n\t-On Test : " + str(
score_on_test) + \ # score_on_test) + \
"\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ # "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \
', '.join( # ', '.join(
labels_dictionary.values()) + "\n\t-Views : " + ', '.join( # labels_dictionary.values()) + "\n\t-Views : " + ', '.join(
views) + "\n\t-" + str( # views) + "\n\t-" + str(
k_folds.n_splits) + \ # k_folds.n_splits) + \
" folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config() # " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config()
#
metrics_scores = get_metrics_scores(metrics_list, pred_train_labels, # metrics_scores = get_metrics_scores(metrics_list, pred_train_labels,
pred_test_labels, # pred_test_labels,
validation_indices, learning_indices, # validation_indices, learning_indices,
labels) # labels)
string_analysis += print_metric_score(metrics_scores, metrics_list) # string_analysis += print_metric_score(metrics_scores, metrics_list)
string_analysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation( # string_analysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation(
directory, labels) # directory, labels)
images_analysis = {} # images_analysis = {}
return string_analysis, images_analysis, metrics_scores # return string_analysis, images_analysis, metrics_scores
...@@ -175,6 +175,8 @@ class MultiviewResultAnalyzer(ResultAnalyser): ...@@ -175,6 +175,8 @@ class MultiviewResultAnalyzer(ResultAnalyser):
hps_method, metrics_list, n_iter, class_label_names, hps_method, metrics_list, n_iter, class_label_names,
train_pred, test_pred, output_file_name, labels, database_name, train_pred, test_pred, output_file_name, labels, database_name,
nb_cores, duration): nb_cores, duration):
if hps_method.endswith("equiv"):
n_iter = n_iter*len(view_names)
ResultAnalyser.__init__(self, classifier, classification_indices, k_folds, ResultAnalyser.__init__(self, classifier, classification_indices, k_folds,
hps_method, metrics_list, n_iter, class_label_names, hps_method, metrics_list, n_iter, class_label_names,
train_pred, test_pred, output_file_name, labels, database_name, train_pred, test_pred, output_file_name, labels, database_name,
......
...@@ -3,15 +3,100 @@ import unittest ...@@ -3,15 +3,100 @@ import unittest
import yaml import yaml
import numpy as np import numpy as np
from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from multiview_platform.tests.utils import rm_tmp, tmp_path from multiview_platform.tests.utils import rm_tmp, tmp_path
from multiview_platform.mono_multi_view_classifiers.utils import base from multiview_platform.mono_multi_view_classifiers.utils import base
class Test_ResultAnalyzer(unittest.TestCase): class FakeClassifier():
pass pass
class Test_BaseEstimator(unittest.TestCase): class Test_ResultAnalyzer(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.rs = np.random.RandomState(42)
cls.classifier = FakeClassifier()
cls.n_examples = 50
cls.n_classes = 3
cls.train_length = 24
cls.train_indices = cls.rs.choice(np.arange(cls.n_examples),
size=cls.train_length,
replace=False)
cls.test_indices = np.array([i for i in range(cls.n_examples)
if i not in cls.train_indices])
cls.test_length = cls.test_indices.shape[0]
cls.classification_indices = [cls.train_indices, cls.test_indices]
cls.n_splits = 5
cls.k_folds = StratifiedKFold(n_splits=cls.n_splits, )
cls.hps_method = "randomized_search"
cls.metrics_list = [("accuracy_score", {}), ("f1_score", {})]
cls.n_iter = 6
cls.class_label_names = ["class{}".format(ind+1)
for ind in range(cls.n_classes)]
cls.train_pred = np.random.randint(0, cls.n_classes,
size=cls.train_length)
cls.test_pred = np.random.randint(0, cls.n_classes,
size=cls.test_length)
cls.directory = "fake_directory"
cls.labels = np.random.randint(0, cls.n_classes,
size=cls.n_examples)
cls.database_name = "test_database"
cls.nb_cores = 0.5
cls.duration = -4
cls.train_accuracy = accuracy_score(cls.labels[cls.train_indices],
cls.train_pred)
cls.test_accuracy = accuracy_score(cls.labels[cls.test_indices],
cls.test_pred)
cls.train_f1 = f1_score(cls.labels[cls.train_indices],
cls.train_pred, average='micro')
cls.test_f1 = f1_score(cls.labels[cls.test_indices],
cls.test_pred, average='micro')
def test_simple(self):
RA = base.ResultAnalyser(self.classifier, self.classification_indices,
self.k_folds, self.hps_method, self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred, self.directory,
self.labels, self.database_name,
self.nb_cores, self.duration)
def test_get_metric_scores(self):
RA = base.ResultAnalyser(self.classifier, self.classification_indices,
self.k_folds, self.hps_method,
self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred,
self.directory,
self.labels, self.database_name,
self.nb_cores, self.duration)
train_score, test_score = RA.get_metric_scores("accuracy_score", {})
self.assertEqual(train_score, self.train_accuracy)
self.assertEqual(test_score, self.test_accuracy)
def test_get_all_metrics_scores(self):
RA = base.ResultAnalyser(self.classifier, self.classification_indices,
self.k_folds, self.hps_method,
self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred,
self.directory,
self.labels, self.database_name,
self.nb_cores, self.duration)
RA.get_all_metrics_scores()
self.assertEqual(RA.metric_scores["accuracy_score"][0],
self.train_accuracy)
self.assertEqual(RA.metric_scores["accuracy_score"][1],
self.test_accuracy)
self.assertEqual(RA.metric_scores["f1_score"][0],
self.train_f1)
self.assertEqual(RA.metric_scores["f1_score"][1],
self.test_f1)
class Test_BaseClassifier(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment