Skip to content
Snippets Groups Projects
Commit 98e14bb6 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added tests on result analysis, added equiv support and missing whitespace"

parent 65b27dd5
Branches
Tags
No related merge requests found
from datetime import timedelta as hms
from .. import metrics
from ..utils.base import get_metric
def get_db_config_string(name, feat, classification_indices, shape,
class_labels_names, k_folds):
"""
Parameters
----------
name
feat
classification_indices
shape
class_labels_names
k_folds
Returns
-------
"""
learning_rate = float(len(classification_indices[0])) / (
len(classification_indices[0]) + len(classification_indices[1]))
db_config_string = "Database configuration : \n"
db_config_string += "\t- Database name : " + name + "\n"
db_config_string += "\t- View name : " + feat + "\t View shape : " + str(
shape) + "\n"
db_config_string += "\t- Learning Rate : " + str(learning_rate) + "\n"
db_config_string += "\t- Labels used : " + ", ".join(
class_labels_names) + "\n"
db_config_string += "\t- Number of cross validation folds : " + str(
k_folds.n_splits) + "\n\n"
return db_config_string
def get_classifier_config_string(grid_search, nb_cores, n_iter, cl_kwargs,
classifier,
output_file_name, y_test):
classifier_config_string = "Classifier configuration : \n"
classifier_config_string += "\t- " + classifier.get_config()[5:] + "\n"
classifier_config_string += "\t- Executed on " + str(
nb_cores) + " core(s) \n"
if grid_search:
classifier_config_string += "\t- Got configuration using randomized search with " + str(
n_iter) + " iterations \n"
classifier_config_string += "\n\n"
classifier_interpret_string = classifier.get_interpretation(
output_file_name,
y_test)
return classifier_config_string, classifier_interpret_string
def get_metric_score(metric, y_train, y_train_pred, y_test, y_test_pred):
metric_module = getattr(metrics, metric[0])
if metric[1] is not None:
metric_kwargs = dict((index, metricConfig) for index, metricConfig in
enumerate(metric[1]))
else:
metric_kwargs = {}
metric_score_train = metric_module.score(y_train, y_train_pred)
metric_score_test = metric_module.score(y_test, y_test_pred)
metric_score_string = "\tFor " + metric_module.get_config(
**metric_kwargs) + " : "
metric_score_string += "\n\t\t- Score on train : " + str(metric_score_train)
metric_score_string += "\n\t\t- Score on test : " + str(metric_score_test)
metric_score_string += "\n"
return metric_score_string, [metric_score_train, metric_score_test]
def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list,
n_iter,
feat, cl_type, cl_kwargs, class_labels_names,
shape, y_train, y_train_pred, y_test, y_test_pred, time,
random_state, classifier, output_file_name):
metric_module, metric_kwargs = get_metric(metrics_list)
train_score = metric_module.score(y_train, y_train_pred)
test_score = metric_module.score(y_test, y_test_pred)
string_analysis = "Classification on " + name + " database for " + feat + " with " + cl_type + ".\n\n"
string_analysis += metrics_list[0][0] + " on train : " + str(
train_score) + "\n" + \
metrics_list[0][0] + " on test : " + str(
test_score) + "\n\n"
string_analysis += get_db_config_string(name, feat, learning_rate, shape,
class_labels_names, k_folds)
classifier_config_string, classifier_intepret_string = get_classifier_config_string(
grid_search, nb_cores, n_iter, cl_kwargs, classifier, output_file_name,
y_test)
string_analysis += classifier_config_string
metrics_scores = {}
for metric in metrics_list:
metric_string, metric_score = get_metric_score(metric, y_train,
y_train_pred, y_test,
y_test_pred)
string_analysis += metric_string
metrics_scores[metric[0]] = metric_score
string_analysis += "\n\n Classification took " + str(hms(seconds=int(time)))
string_analysis += "\n\n Classifier Interpretation : \n"
string_analysis += classifier_intepret_string
image_analysis = {}
return string_analysis, image_analysis, metrics_scores
# from datetime import timedelta as hms
#
# from .. import metrics
# from ..utils.base import get_metric
#
#
# def get_db_config_string(name, feat, classification_indices, shape,
# class_labels_names, k_folds):
# """
#
# Parameters
# ----------
# name
# feat
# classification_indices
# shape
# class_labels_names
# k_folds
#
# Returns
# -------
#
# """
# learning_rate = float(len(classification_indices[0])) / (
# len(classification_indices[0]) + len(classification_indices[1]))
# db_config_string = "Database configuration : \n"
# db_config_string += "\t- Database name : " + name + "\n"
# db_config_string += "\t- View name : " + feat + "\t View shape : " + str(
# shape) + "\n"
# db_config_string += "\t- Learning Rate : " + str(learning_rate) + "\n"
# db_config_string += "\t- Labels used : " + ", ".join(
# class_labels_names) + "\n"
# db_config_string += "\t- Number of cross validation folds : " + str(
# k_folds.n_splits) + "\n\n"
# return db_config_string
#
#
# def get_classifier_config_string(grid_search, nb_cores, n_iter, cl_kwargs,
# classifier,
# output_file_name, y_test):
# classifier_config_string = "Classifier configuration : \n"
# classifier_config_string += "\t- " + classifier.get_config()[5:] + "\n"
# classifier_config_string += "\t- Executed on " + str(
# nb_cores) + " core(s) \n"
# if grid_search:
# classifier_config_string += "\t- Got configuration using randomized search with " + str(
# n_iter) + " iterations \n"
# classifier_config_string += "\n\n"
# classifier_interpret_string = classifier.get_interpretation(
# output_file_name,
# y_test)
# return classifier_config_string, classifier_interpret_string
#
#
# def get_metric_score(metric, y_train, y_train_pred, y_test, y_test_pred):
# metric_module = getattr(metrics, metric[0])
# if metric[1] is not None:
# metric_kwargs = dict((index, metricConfig) for index, metricConfig in
# enumerate(metric[1]))
# else:
# metric_kwargs = {}
# metric_score_train = metric_module.score(y_train, y_train_pred)
# metric_score_test = metric_module.score(y_test, y_test_pred)
# metric_score_string = "\tFor " + metric_module.get_config(
# **metric_kwargs) + " : "
# metric_score_string += "\n\t\t- Score on train : " + str(metric_score_train)
# metric_score_string += "\n\t\t- Score on test : " + str(metric_score_test)
# metric_score_string += "\n"
# return metric_score_string, [metric_score_train, metric_score_test]
#
#
# def execute(name, learning_rate, k_folds, nb_cores, grid_search, metrics_list,
# n_iter,
# feat, cl_type, cl_kwargs, class_labels_names,
# shape, y_train, y_train_pred, y_test, y_test_pred, time,
# random_state, classifier, output_file_name):
# metric_module, metric_kwargs = get_metric(metrics_list)
# train_score = metric_module.score(y_train, y_train_pred)
# test_score = metric_module.score(y_test, y_test_pred)
# string_analysis = "Classification on " + name + " database for " + feat + " with " + cl_type + ".\n\n"
# string_analysis += metrics_list[0][0] + " on train : " + str(
# train_score) + "\n" + \
# metrics_list[0][0] + " on test : " + str(
# test_score) + "\n\n"
# string_analysis += get_db_config_string(name, feat, learning_rate, shape,
# class_labels_names, k_folds)
# classifier_config_string, classifier_intepret_string = get_classifier_config_string(
# grid_search, nb_cores, n_iter, cl_kwargs, classifier, output_file_name,
# y_test)
# string_analysis += classifier_config_string
# metrics_scores = {}
# for metric in metrics_list:
# metric_string, metric_score = get_metric_score(metric, y_train,
# y_train_pred, y_test,
# y_test_pred)
# string_analysis += metric_string
# metrics_scores[metric[0]] = metric_score
# string_analysis += "\n\n Classification took " + str(hms(seconds=int(time)))
# string_analysis += "\n\n Classifier Interpretation : \n"
# string_analysis += classifier_intepret_string
#
# image_analysis = {}
# return string_analysis, image_analysis, metrics_scores
from .. import metrics
from ..utils.base import get_metric
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def print_metric_score(metric_scores, metric_list):
"""
this function print the metrics scores
Parameters
----------
metric_scores : the score of metrics
metric_list : list of metrics
Returns
-------
metric_score_string string constaining all metric results
"""
metric_score_string = "\n\n"
for metric in metric_list:
metric_module = getattr(metrics, metric[0])
if metric[1] is not None:
metric_kwargs = dict(
(index, metricConfig) for index, metricConfig in
enumerate(metric[1]))
else:
metric_kwargs = {}
metric_score_string += "\tFor " + metric_module.get_config(
**metric_kwargs) + " : "
metric_score_string += "\n\t\t- Score on train : " + str(
metric_scores[metric[0]][0])
metric_score_string += "\n\t\t- Score on test : " + str(
metric_scores[metric[0]][1])
metric_score_string += "\n\n"
return metric_score_string
def get_total_metric_scores(metric, train_labels, test_labels,
validation_indices,
learning_indices, labels):
"""
Parameters
----------
metric :
train_labels : labels of train
test_labels : labels of test
validation_indices :
learning_indices :
labels :
Returns
-------
list of [train_score, test_score]
"""
metric_module = getattr(metrics, metric[0])
if metric[1] is not None:
metric_kwargs = dict((index, metricConfig) for index, metricConfig in
enumerate(metric[1]))
else:
metric_kwargs = {}
train_score = metric_module.score(labels[learning_indices], train_labels,
**metric_kwargs)
test_score = metric_module.score(labels[validation_indices], test_labels,
**metric_kwargs)
return [train_score, test_score]
def get_metrics_scores(metrics, train_labels, test_labels,
validation_indices, learning_indices, labels):
metrics_scores = {}
for metric in metrics:
metrics_scores[metric[0]] = get_total_metric_scores(metric,
train_labels,
test_labels,
validation_indices,
learning_indices,
labels)
return metrics_scores
def execute(classifier, pred_train_labels, pred_test_labels,
classification_indices, labels_dictionary, views, name, k_folds,
metrics_list, labels, directory):
"""
Parameters
----------
classifier : classifier used
pred_train_labels : labels of train
pred_test_labels : labels of test
classification_indices
labels_dictionary
views
name
k_folds
metrics_list
labels
Returns
-------
return tuple of (string_analysis, images_analysis, metricsScore)
"""
classifier_name = classifier.short_name
learning_indices, validation_indices = classification_indices
metric_module, metric_kwargs = get_metric(metrics_list)
score_on_train = metric_module.score(labels[learning_indices],
pred_train_labels,
**metric_kwargs)
score_on_test = metric_module.score(labels[validation_indices],
pred_test_labels, **metric_kwargs)
string_analysis = "\t\tResult for multiview classification with " + classifier_name + \
"\n\n" + metrics_list[0][0] + " :\n\t-On Train : " + str(
score_on_train) + "\n\t-On Test : " + str(
score_on_test) + \
"\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \
', '.join(
labels_dictionary.values()) + "\n\t-Views : " + ', '.join(
views) + "\n\t-" + str(
k_folds.n_splits) + \
" folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config()
metrics_scores = get_metrics_scores(metrics_list, pred_train_labels,
pred_test_labels,
validation_indices, learning_indices,
labels)
string_analysis += print_metric_score(metrics_scores, metrics_list)
string_analysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation(
directory, labels)
images_analysis = {}
return string_analysis, images_analysis, metrics_scores
# from .. import metrics
#
# from ..utils.base import get_metric
#
# # Author-Info
# __author__ = "Baptiste Bauvin"
# __status__ = "Prototype" # Production, Development, Prototype
#
#
# def print_metric_score(metric_scores, metric_list):
# """
# this function print the metrics scores
#
# Parameters
# ----------
# metric_scores : the score of metrics
#
# metric_list : list of metrics
#
# Returns
# -------
# metric_score_string string constaining all metric results
# """
# metric_score_string = "\n\n"
# for metric in metric_list:
# metric_module = getattr(metrics, metric[0])
# if metric[1] is not None:
# metric_kwargs = dict(
# (index, metricConfig) for index, metricConfig in
# enumerate(metric[1]))
# else:
# metric_kwargs = {}
# metric_score_string += "\tFor " + metric_module.get_config(
# **metric_kwargs) + " : "
# metric_score_string += "\n\t\t- Score on train : " + str(
# metric_scores[metric[0]][0])
# metric_score_string += "\n\t\t- Score on test : " + str(
# metric_scores[metric[0]][1])
# metric_score_string += "\n\n"
# return metric_score_string
#
#
# def get_total_metric_scores(metric, train_labels, test_labels,
# validation_indices,
# learning_indices, labels):
# """
#
# Parameters
# ----------
#
# metric :
#
# train_labels : labels of train
#
# test_labels : labels of test
#
# validation_indices :
#
# learning_indices :
#
# labels :
#
# Returns
# -------
# list of [train_score, test_score]
# """
# metric_module = getattr(metrics, metric[0])
# if metric[1] is not None:
# metric_kwargs = dict((index, metricConfig) for index, metricConfig in
# enumerate(metric[1]))
# else:
# metric_kwargs = {}
# train_score = metric_module.score(labels[learning_indices], train_labels,
# **metric_kwargs)
# test_score = metric_module.score(labels[validation_indices], test_labels,
# **metric_kwargs)
# return [train_score, test_score]
#
#
# def get_metrics_scores(metrics, train_labels, test_labels,
# validation_indices, learning_indices, labels):
# metrics_scores = {}
# for metric in metrics:
# metrics_scores[metric[0]] = get_total_metric_scores(metric,
# train_labels,
# test_labels,
# validation_indices,
# learning_indices,
# labels)
# return metrics_scores
#
#
# def execute(classifier, pred_train_labels, pred_test_labels,
# classification_indices, labels_dictionary, views, name, k_folds,
# metrics_list, labels, directory):
# """
#
# Parameters
# ----------
# classifier : classifier used
#
# pred_train_labels : labels of train
#
# pred_test_labels : labels of test
#
# classification_indices
#
# labels_dictionary
#
# views
#
# name
#
# k_folds
#
# metrics_list
#
# labels
#
# Returns
# -------
# return tuple of (string_analysis, images_analysis, metricsScore)
# """
# classifier_name = classifier.short_name
# learning_indices, validation_indices = classification_indices
# metric_module, metric_kwargs = get_metric(metrics_list)
# score_on_train = metric_module.score(labels[learning_indices],
# pred_train_labels,
# **metric_kwargs)
# score_on_test = metric_module.score(labels[validation_indices],
# pred_test_labels, **metric_kwargs)
#
# string_analysis = "\t\tResult for multiview classification with " + classifier_name + \
# "\n\n" + metrics_list[0][0] + " :\n\t-On Train : " + str(
# score_on_train) + "\n\t-On Test : " + str(
# score_on_test) + \
# "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \
# ', '.join(
# labels_dictionary.values()) + "\n\t-Views : " + ', '.join(
# views) + "\n\t-" + str(
# k_folds.n_splits) + \
# " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifier_name + " with : " + classifier.get_config()
#
# metrics_scores = get_metrics_scores(metrics_list, pred_train_labels,
# pred_test_labels,
# validation_indices, learning_indices,
# labels)
# string_analysis += print_metric_score(metrics_scores, metrics_list)
# string_analysis += "\n\n Interpretation : \n\n" + classifier.get_interpretation(
# directory, labels)
# images_analysis = {}
# return string_analysis, images_analysis, metrics_scores
......@@ -175,6 +175,8 @@ class MultiviewResultAnalyzer(ResultAnalyser):
hps_method, metrics_list, n_iter, class_label_names,
train_pred, test_pred, output_file_name, labels, database_name,
nb_cores, duration):
if hps_method.endswith("equiv"):
n_iter = n_iter*len(view_names)
ResultAnalyser.__init__(self, classifier, classification_indices, k_folds,
hps_method, metrics_list, n_iter, class_label_names,
train_pred, test_pred, output_file_name, labels, database_name,
......
......@@ -3,15 +3,100 @@ import unittest
import yaml
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from multiview_platform.tests.utils import rm_tmp, tmp_path
from multiview_platform.mono_multi_view_classifiers.utils import base
class Test_ResultAnalyzer(unittest.TestCase):
class FakeClassifier():
pass
class Test_BaseEstimator(unittest.TestCase):
class Test_ResultAnalyzer(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.rs = np.random.RandomState(42)
cls.classifier = FakeClassifier()
cls.n_examples = 50
cls.n_classes = 3
cls.train_length = 24
cls.train_indices = cls.rs.choice(np.arange(cls.n_examples),
size=cls.train_length,
replace=False)
cls.test_indices = np.array([i for i in range(cls.n_examples)
if i not in cls.train_indices])
cls.test_length = cls.test_indices.shape[0]
cls.classification_indices = [cls.train_indices, cls.test_indices]
cls.n_splits = 5
cls.k_folds = StratifiedKFold(n_splits=cls.n_splits, )
cls.hps_method = "randomized_search"
cls.metrics_list = [("accuracy_score", {}), ("f1_score", {})]
cls.n_iter = 6
cls.class_label_names = ["class{}".format(ind+1)
for ind in range(cls.n_classes)]
cls.train_pred = np.random.randint(0, cls.n_classes,
size=cls.train_length)
cls.test_pred = np.random.randint(0, cls.n_classes,
size=cls.test_length)
cls.directory = "fake_directory"
cls.labels = np.random.randint(0, cls.n_classes,
size=cls.n_examples)
cls.database_name = "test_database"
cls.nb_cores = 0.5
cls.duration = -4
cls.train_accuracy = accuracy_score(cls.labels[cls.train_indices],
cls.train_pred)
cls.test_accuracy = accuracy_score(cls.labels[cls.test_indices],
cls.test_pred)
cls.train_f1 = f1_score(cls.labels[cls.train_indices],
cls.train_pred, average='micro')
cls.test_f1 = f1_score(cls.labels[cls.test_indices],
cls.test_pred, average='micro')
def test_simple(self):
RA = base.ResultAnalyser(self.classifier, self.classification_indices,
self.k_folds, self.hps_method, self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred, self.directory,
self.labels, self.database_name,
self.nb_cores, self.duration)
def test_get_metric_scores(self):
RA = base.ResultAnalyser(self.classifier, self.classification_indices,
self.k_folds, self.hps_method,
self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred,
self.directory,
self.labels, self.database_name,
self.nb_cores, self.duration)
train_score, test_score = RA.get_metric_scores("accuracy_score", {})
self.assertEqual(train_score, self.train_accuracy)
self.assertEqual(test_score, self.test_accuracy)
def test_get_all_metrics_scores(self):
RA = base.ResultAnalyser(self.classifier, self.classification_indices,
self.k_folds, self.hps_method,
self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred,
self.directory,
self.labels, self.database_name,
self.nb_cores, self.duration)
RA.get_all_metrics_scores()
self.assertEqual(RA.metric_scores["accuracy_score"][0],
self.train_accuracy)
self.assertEqual(RA.metric_scores["accuracy_score"][1],
self.test_accuracy)
self.assertEqual(RA.metric_scores["f1_score"][0],
self.train_f1)
self.assertEqual(RA.metric_scores["f1_score"][1],
self.test_f1)
class Test_BaseClassifier(unittest.TestCase):
@classmethod
def setUpClass(cls):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment