diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
index 2d6a1532f649598eb8dbfa8d791e8e68d088993e..177de995976643f9b45f878b04b1948b5e484084 100644
--- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py
+++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py
@@ -910,7 +910,7 @@ def exec_classif(arguments):
                 noise_std)
             args["name"] = datasetname
 
-            splits = execution.gen_splits(dataset_var.get_labels(),
+            splits = execution.gen_splits(dataset_var,
                                           args["split"],
                                           stats_iter_random_states)
 
diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py
index fdbe28178b653a5929bddd13d3aca9ec8b90ea66..aa057156d9a821d35079718a65bd6adb6c9a0177 100644
--- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py
+++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py
@@ -376,7 +376,7 @@ def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35):
 
 
 def get_metrics_scores(metrics, results):
-    r"""Used to extract metrics scores in case of biclass classification
+    r"""Used to extract metrics scores in case of classification
 
     Parameters
     ----------
@@ -695,8 +695,7 @@ def publish_feature_importances(feature_importances, directory, database_name,
 
 
 def get_arguments(benchmark_argument_dictionaries, iter_index):
-    r"""Used to get the arguments passed to the benchmark executing function corresponding to the flag of a
-    biclass experimentation.
+    r"""Used to get the arguments passed to the benchmark executing function corresponding to the flag of an experimentation.
 
     Parameters
     ----------
@@ -773,7 +772,7 @@ def publish_tracebacks(directory, database_name, labels_names, tracebacks,
 
 def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
                        metrics, example_ids, labels):
-    r"""Used to extract and format the results of the different biclass experimentations performed.
+    r"""Used to extract and format the results of the different experimentations performed.
 
     Parameters
     ----------
@@ -798,7 +797,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
         The list contains a dictionary for each statistical iteration. This dictionary contains a dictionary for each
         label combination, regrouping the scores for each metrics and the information useful to plot errors on examples.
     """
-    logging.debug("Srart:\t Analzing all biclass resuls")
+    logging.debug("Srart:\t Analzing all resuls")
     iter_results = {"metrics_scores": [i for i in range(stats_iter)],
                     "example_errors": [i for i in range(stats_iter)],
                     "feature_importances": [i for i in range(stats_iter)],
@@ -835,7 +834,7 @@ def analyze_iterations(results, benchmark_argument_dictionaries, stats_iter,
         iter_results["labels"] = labels
         iter_results["durations"][iter_index] = durations
 
-    logging.debug("Done:\t Analzing all biclass resuls")
+    logging.debug("Done:\t Analyzing all results")
 
     return res, iter_results, flagged_tracebacks_list
 
@@ -888,7 +887,7 @@ def publish_all_example_errors(iter_results, directory,
                                stats_iter,
                                example_ids, labels):
     logging.debug(
-        "Start:\t Global biclass label analysis figure generation")
+        "Start:\t Global label analysis figure generation")
 
     nbExamples, nbClassifiers, data, \
     error_on_examples, classifier_names = gen_error_data_glob(iter_results,
@@ -905,7 +904,7 @@ def publish_all_example_errors(iter_results, directory,
                     nbExamples, os.path.join(directory, ""))
 
     logging.debug(
-        "Done:\t Global biclass label analysis figures generation")
+        "Done:\t Global label analysis figures generation")
 
 
 
@@ -931,18 +930,18 @@ def add_new_labels_combination(iterBiclassResults, labelsComination,
     return iterBiclassResults
 
 
-def add_new_metric(iter_biclass_results, metric, labels_combination,
+def add_new_metric(iter_results, metric, labels_combination,
                    nb_classifiers,
                    stats_iter):
-    if metric[0] not in iter_biclass_results[labels_combination][
+    if metric[0] not in iter_results[labels_combination][
         "metrics_scores"]:
-        iter_biclass_results[labels_combination]["metrics_scores"][
+        iter_results[labels_combination]["metrics_scores"][
             metric[0]] = {
             "train_scores":
                 np.zeros((nb_classifiers, stats_iter)),
             "test_scores":
                 np.zeros((nb_classifiers, stats_iter))}
-    return iter_biclass_results
+    return iter_results
 
 
 def format_previous_results(iter_results_lists):
@@ -969,8 +968,6 @@ def format_previous_results(iter_results_lists):
     metrics_analysis = {}
     feature_importances_analysis = {}
     feature_importances_stds = {}
-    # labels = dict((key,"") for key in biclass_results.keys())
-    # for biclass_result in biclass_results.items():
 
     metric_concat_dict = {}
     for iter_index, metrics_score in enumerate(
@@ -1027,13 +1024,13 @@ def format_previous_results(iter_results_lists):
            iter_results_lists["labels"], duration_means, duration_stds
 
 
-def analyze_all(biclass_results, stats_iter, directory, data_base_name,
+def analyze_all(iters_results, stats_iter, directory, data_base_name,
                 example_ids):
     """Used to format the results in order to plot the mean results on the iterations"""
     metrics_analysis, error_analysis, \
     feature_importances, feature_importances_stds, \
     labels, duration_means, \
-    duration_stds = format_previous_results(biclass_results)
+    duration_stds = format_previous_results(iters_results)
 
     results = publish_all_metrics_scores(metrics_analysis,
                                          directory,
@@ -1059,7 +1056,7 @@ def get_results(results, stats_iter, benchmark_argument_dictionaries,
     """Used to analyze the results of the previous benchmarks"""
     data_base_name = benchmark_argument_dictionaries[0]["args"]["name"]
 
-    results_means_std, biclass_results, flagged_failed = analyze_iterations(
+    results_means_std, iters_results, flagged_failed = analyze_iterations(
         results, benchmark_argument_dictionaries,
         stats_iter, metrics, example_ids, labels)
     if flagged_failed:
@@ -1067,7 +1064,7 @@ def get_results(results, stats_iter, benchmark_argument_dictionaries,
 
     if stats_iter > 1:
         results_means_std = analyze_all(
-            biclass_results, stats_iter, directory,
+            iters_results, stats_iter, directory,
             data_base_name, example_ids)
     return results_means_std
 
diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py
index 9bf1b72d8728ce71eadb6dffc4a51141a846f9fa..ec783b662b7aa608f1b7391f00ee059f75056e68 100644
--- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py
+++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py
@@ -180,7 +180,7 @@ def init_log_file(name, views, cl_type, log, debug, label,
     return result_directory
 
 
-def gen_splits(labels, split_ratio, stats_iter_random_states):
+def gen_splits(dataset_var, split_ratio, stats_iter_random_states):
     r"""Used to _gen the train/test splits using one or multiple random states.
 
     Parameters
@@ -198,6 +198,8 @@ def gen_splits(labels, split_ratio, stats_iter_random_states):
         For each statistical iteration a couple of numpy.ndarrays is stored with the indices for the training set and
         the ones of the testing set.
     """
+    labels = dataset_var.get_labels()
+    example_ids = dataset_var.example_ids
     indices = np.arange(len(labels))
     splits = []
     for random_state in stats_iter_random_states:
@@ -208,7 +210,10 @@ def gen_splits(labels, split_ratio, stats_iter_random_states):
         for fold in folds:
             train_fold, test_fold = fold
         train_indices = indices[train_fold]
-        test_indices = indices[test_fold]
+        test_indices = []
+        for ind in test_fold:
+            if not example_ids[ind].startswith("new_"):
+                test_indices.append(indices[ind])
         splits.append([train_indices, test_indices])
 
     return splits
diff --git a/multiview_platform/tests/__init__.py b/multiview_platform/tests/__init__.py
index b7887f5996bd1484d567e919608c364fe9a64c63..194018ae5ef03ba4d863b4e1497acae3b317589a 100644
--- a/multiview_platform/tests/__init__.py
+++ b/multiview_platform/tests/__init__.py
@@ -1,2 +1,2 @@
-from . import test_ExecClassif
+from . import test_exec_classif
 from .utils import rm_tmp, gen_test_dataset, tmp_path
\ No newline at end of file
diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_exec_classif.py
similarity index 100%
rename from multiview_platform/tests/test_ExecClassif.py
rename to multiview_platform/tests/test_exec_classif.py
diff --git a/multiview_platform/tests/test_ResultAnalysis.py b/multiview_platform/tests/test_result_analysis.py
similarity index 99%
rename from multiview_platform/tests/test_ResultAnalysis.py
rename to multiview_platform/tests/test_result_analysis.py
index 98e4cabf602f66505fc784e0bd66fd464f7656de..4a825dc71b8b43bd9b57c311ad7bf15392775e09 100644
--- a/multiview_platform/tests/test_ResultAnalysis.py
+++ b/multiview_platform/tests/test_result_analysis.py
@@ -19,7 +19,7 @@ class Test_get_arguments(unittest.TestCase):
         self.assertTrue(argument_dict["valid"])
 
 
-class Test_get_metrics_scores_biclass(unittest.TestCase):
+class Test_get_metrics_scores(unittest.TestCase):
 
 
     def test_simple(self):
@@ -131,7 +131,7 @@ class Test_get_metrics_scores_biclass(unittest.TestCase):
             np.array(metrics_scores["f1_score"].columns),
             np.array(["mv", "dt-1"]))
 
-class Test_get_example_errors_biclass(unittest.TestCase):
+class Test_get_example_errors(unittest.TestCase):
 
     def test_simple(self):
         ground_truth = np.array([0,1,0,1,0,1,0,1, -100])