Adding multiclass

7a4dc498 · bbauvin · 3d2a2052 · 7a4dc498 · 7a4dc498 · 7a4dc498
Commit 7a4dc498 authored Oct 30, 2017 by bbauvin
--- a/.travis.yml
+++ b/.travis.yml
+language: python
+python:
+- 2.7
+- 3.6
+addons:
+  apt:
+    packages:
+    - libblas-dev
+    - liblapack-dev
+    - gfortran
+install:
+- pip install -U pip pip-tools
+- pip install numpy scipy scikit-learn
+- git clone https://github.com/aldro61/pyscm.git && python setup.py install
+script:
+- python setup.py test
+notifications:
+  email:
+    on_success: change
+on_failure: change
\ No newline at end of file
--- a/Code/MonoMultiViewClassifiers/ExecClassif.py
+++ b/Code/MonoMultiViewClassifiers/ExecClassif.py
@@ -19,7 +19,7 @@ from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore
 from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
 from .utils import GetMultiviewDb as DB
 from .ResultAnalysis import resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes
-from .utils import execution, Dataset
+from .utils import execution, Dataset, Multiclass

 # Author-Info
 __author__ = "Baptiste Bauvin"
@@ -66,15 +66,23 @@ def initBenchmark(args):
    return benchmark


-def initMonoviewExps(benchmark, argumentDictionaries, views, allViews, NB_CLASS, kwargsInit):
+def genViewsDictionnary(DATASET):
+    datasetsNames = DATASET.keys()
+    viewsDictionary = dict((DATASET.get(datasetName).attrs["name"], int(datasetName[4:]))
+                           for datasetName in datasetsNames
+                           if datasetName[:4]=="View")
+    return viewsDictionary
+
+
+def initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary,  NB_CLASS, kwargsInit):
    """Used to add each monoview exeperience args to the list of monoview experiences args"""
    if benchmark["Monoview"]:
        argumentDictionaries["Monoview"] = []
-        for view in views:
+        for viewName, viewIndex in viewsDictionary.items():
            for classifier in benchmark["Monoview"]:
                arguments = {
-                    "args": {classifier + "KWARGS": kwargsInit[classifier + "KWARGSInit"], "feat": view,
-                             "CL_type": classifier, "nbClass": NB_CLASS}, "viewIndex": allViews.index(view)}
+                    "args": {classifier + "KWARGS": kwargsInit[classifier + "KWARGSInit"], "feat": viewName,
+                             "CL_type": classifier, "nbClass": NB_CLASS}, "viewIndex": viewIndex}
                argumentDictionaries["Monoview"].append(arguments)
    return argumentDictionaries

@@ -272,18 +280,20 @@ def execClassif(arguments):
    DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
                                             args.CL_classes)

-    datasetLength = DATASET.get("Metadata").attrs["datasetLength"]
-    classificationIndices = execution.genSplits(statsIter, datasetLength, DATASET, args.CL_split, statsIterRandomStates)
+    multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value)
+
+    classificationIndices = execution.genSplits(statsIter, oldIndicesMulticlass, multiclassLabels, args.CL_split, statsIterRandomStates)

    kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)

    datasetFiles = Dataset.initMultipleDatasets(args, nbCores)

-    views, viewsIndices, allViews = execution.initViews(DATASET, args)
-    if not views:
-        raise ValueError("Empty views list, modify selected views to match dataset " + args.views)
+    # views, viewsIndices, allViews = execution.initViews(DATASET, args)
+    # if not views:
+    #     raise ValueError("Empty views list, modify selected views to match dataset " + args.views)
+    viewsDictionary = genViewsDictionnary(DATASET)

-    NB_VIEW = len(views)
+    # NB_VIEW = DATASET.get("Metadata").attrs["nbViews"]
    NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]

    metrics = [metric.split(":") for metric in args.CL_metrics]
@@ -296,7 +306,7 @@ def execClassif(arguments):
        if len(metric) == 1:
            metrics[metricIndex] = [metric[0], None]

-    logging.info("Start:\t Finding all available mono- & multiview algorithms")
+    logging.debug("Start:\t Finding all available mono- & multiview algorithms")

    benchmark = initBenchmark(args)

@@ -305,7 +315,7 @@ def execClassif(arguments):
    dataBaseTime = time.time() - start

    argumentDictionaries = {"Monoview": [], "Multiview": []}
-    argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, views, allViews, NB_CLASS,
+    argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS,
                                            initKWARGS)
    directories = execution.genDirecortiesNames(directory, statsIter)

@@ -323,7 +333,7 @@ def execClassif(arguments):
            np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",")
        if nbCores > 1:
            iterResults = []
-            nbExperiments = statsIter
+            nbExperiments = statsIter*len(multiclassLabels)
            for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
                iterResults += (Parallel(n_jobs=nbCores)(
                    delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1,

--- a/Code/MonoMultiViewClassifiers/utils/Multiclass.py
+++ b/Code/MonoMultiViewClassifiers/utils/Multiclass.py
+import numpy as np
+import itertools
+
+
+def genMulticlassLabels(labels, multiclassMethod):
+    if multiclassMethod == "oneVersusOne":
+        nbLabels = len(set(list(labels)))
+        if nbLabels == 2:
+            return [labels], [(0,1)], np.arange(len(labels))
+        else:
+            combinations = itertools.combinations(np.arange(nbLabels), 2)
+            multiclassLabels = []
+            labelsIndices = []
+            oldIndicesMulticlass = []
+            for combination in combinations:
+                labelsIndices.append(combination)
+                oldIndices = [exampleIndex
+                              for exampleIndex, exampleLabel in enumerate(labels)
+                              if exampleLabel in combination]
+                multiclassLabels.append(np.array([1 if exampleLabel==combination[0]
+                                                  else 0
+                                                  for exampleLabel in labels[oldIndices]]))
+                oldIndicesMulticlass.append(oldIndices)
+    elif multiclassMethod == "oneVersusRest":
+        # TODO : Implement one versus rest if probas are not a problem anymore
+        pass
+    return multiclassLabels, labelsIndices, oldIndicesMulticlass
--- a/Code/MonoMultiViewClassifiers/utils/execution.py
+++ b/Code/MonoMultiViewClassifiers/utils/execution.py
@@ -42,7 +42,7 @@ def parseTheArgs(arguments):
    groupClass = parser.add_argument_group('Classification arguments')
    groupClass.add_argument('--CL_multiclassMethod', metavar='STRING', action='store',
                            help='Determine which multiclass method to use if the dataset is multiclass',
-                            default="biclass")
+                            default="oneVersusOne")
    groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
                            help='Determine the split ratio between learning and validation sets', type=float,
                            default=0.2)
@@ -238,30 +238,33 @@ def initLogFile(args):
    return resultDirectory


-def genSplits(statsIter, datasetlength, DATASET, splitRatio, statsIterRandomStates):
+def genSplits(statsIter, oldIndicesMulticlass, multiclasslabels, splitRatio, statsIterRandomStates, multiclassMethod):
    """Used to gen the train/test splits using one or multiple random states"""
-    indices = np.arange(datasetlength)
+    for oldIndices, labels in zip(oldIndicesMulticlass, multiclasslabels):
+        indices = oldIndices
+        splitsMulticlass = []
        if statsIter > 1:
            splits = []
            for randomState in statsIterRandomStates:
                foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
                                                                          random_state=randomState,
                                                                          test_size=splitRatio)
-            folds = foldsObj.split(indices, DATASET.get("Labels").value)
+                folds = foldsObj.split(indices, labels)
                for fold in folds:
                    train_fold, test_fold = fold
                trainIndices = indices[train_fold]
                testIndices = indices[test_fold]
                splits.append([trainIndices, testIndices])
-        return splits
+            splitsMulticlass.append(splits)
        else:
            foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=statsIterRandomStates, test_size=splitRatio)
-        folds = foldsObj.split(indices, DATASET.get("Labels").value)
+            folds = foldsObj.split(indices, labels)
            for fold in folds:
                train_fold, test_fold = fold
            trainIndices = indices[train_fold]
            testIndices = indices[test_fold]
-        return trainIndices, testIndices
+            splitsMulticlass.append((trainIndices, testIndices))
+    return splitsMulticlass


 def genKFolds(statsIter, nbFolds, statsIterRandomStates):
@@ -293,12 +296,29 @@ def initViews(DATASET, args):
        return views, viewsIndices, allViews


-def genDirecortiesNames(directory, statsIter):
+def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, labelDictionary):
    """Used to generate the different directories of each iteration if needed"""
    if statsIter > 1:
        directories = []
        for i in range(statsIter):
-            directories.append(directory + "iter_" + str(i + 1) + "/")
-        return directories
+            if multiclassMethod == "oneVersusOne":
+                for labelIndex1, labelIndex2 in labelsIndices:
+                    labelName1 = labelDictionary[labelIndex1]
+                    labelName2 = labelDictionary[labelIndex2]
+                    directories.append(directory + "iter_" + str(i + 1) + "/"+labelName1+"_vs_"+labelName2+"/")
+            elif multiclassMethod == "oneVersusRest":
+                for labelIndex in labelsIndices:
+                    labelName = labelDictionary[labelIndex]
+                    directories.append(directory + "iter_" + str(i + 1) + "/"+labelName+"_vs_Rest/")
    else:
-        return directory
+        directories = []
+        if multiclassMethod == "oneVersusOne":
+            for labelIndex1, labelIndex2 in labelsIndices:
+                labelName1 = labelDictionary[labelIndex1]
+                labelName2 = labelDictionary[labelIndex2]
+                directories.append(directory +labelName1+"_vs_"+labelName2+"/")
+        elif multiclassMethod == "oneVersusRest":
+            for labelIndex in labelsIndices:
+                labelName = labelDictionary[labelIndex]
+                directories.append(directory +labelName+"_vs_Rest/")
+    return directories
--- a/Code/Tests/Test_utils/test_execution.py
+++ b/Code/Tests/Test_utils/test_execution.py
@@ -88,3 +88,49 @@ class Test_genKFolds(unittest.TestCase):

    def test_genKFolds_iter(self):
        pass
+
+
+class Test_genDirecortiesNames(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.directory = "../chicken_is_heaven/"
+        cls.stats_iter = 5
+        cls.labels_indices = [(0,1), (0,2), (0,3), (1,2), (1,3), (2,3)]
+        cls.multiclass_method = "oneVersusOne"
+        cls.labels_dictionary = {0:"test1", 1:"test2", 2:"test3", 3:"test4"}
+        pass
+
+    def test_simple_ovo(cls):
+        directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
+                                                    cls.multiclass_method, cls.labels_dictionary)
+        cls.assertEqual(len(directories), 30)
+        cls.assertEqual(directories[0], "../chicken_is_heaven/iter_1/test1_vs_test2/")
+        cls.assertEqual(directories[-1], "../chicken_is_heaven/iter_5/test3_vs_test4/")
+
+    def test_simple_ovr(cls):
+        cls.multiclass_method = "oneVersusRest"
+        cls.labels_indices = [0,1,2,3]
+        directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
+                                                    cls.multiclass_method, cls.labels_dictionary)
+        cls.assertEqual(len(directories), 20)
+        cls.assertEqual(directories[-1], "../chicken_is_heaven/iter_5/test4_vs_Rest/")
+        cls.assertEqual(directories[0], "../chicken_is_heaven/iter_1/test1_vs_Rest/")
+
+    def test_ovo_no_iter(cls):
+        cls.stats_iter = 1
+        directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
+                                                    cls.multiclass_method, cls.labels_dictionary)
+        cls.assertEqual(len(directories), 6)
+        cls.assertEqual(directories[0], "../chicken_is_heaven/test1_vs_test2/")
+        cls.assertEqual(directories[-1], "../chicken_is_heaven/test3_vs_test4/")
+
+    def test_ovr_no_iter(cls):
+        cls.stats_iter = 1
+        cls.multiclass_method = "oneVersusRest"
+        cls.labels_indices = [0,1,2,3]
+        directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
+                                                    cls.multiclass_method, cls.labels_dictionary)
+        cls.assertEqual(len(directories), 4)
+        cls.assertEqual(directories[-1], "../chicken_is_heaven/test4_vs_Rest/")
+        cls.assertEqual(directories[0], "../chicken_is_heaven/test1_vs_Rest/")
\ No newline at end of file
--- a/Code/Tests/Test_utils/test_multiclass.py
+++ b/Code/Tests/Test_utils/test_multiclass.py
+import unittest
+import numpy as np
+
+from ...MonoMultiViewClassifiers.utils import Multiclass
+
+
+class Test_genMulticlassLabels(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.random_state = np.random.RandomState(42)
+        cls.labels = cls.random_state.randint(0,5,50)
+
+    def test_one_versus_one(cls):
+        multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(cls.labels, "oneVersusOne")
+        cls.assertEqual(len(multiclassLabels), 10)
+        cls.assertEqual(labelsIndices, [(0,1), (0,2), (0,3), (0,4), (1,2), (1,3), (1,4), (2,3), (2,4), (3,4)])
+        np.testing.assert_array_equal(oldIndicesMulticlass[0],
+                                      np.array([5, 13, 15, 18, 20, 23, 24, 27, 33, 38, 39, 41, 43, 44, 45, 46, 48]))
+        np.testing.assert_array_equal(multiclassLabels[0],
+                                      np.array([0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0]))
--- a/Code/Tests/test_ExecClassif.py
+++ b/Code/Tests/test_ExecClassif.py
@@ -23,12 +23,12 @@ class Test_initMonoviewArguments(unittest.TestCase):

    def test_initMonoviewArguments_no_monoview(self):
        benchmark = {"Monoview":{}, "Multiview":{}}
-        arguments = ExecClassif.initMonoviewExps(benchmark, {}, [], None, 0, {})
+        arguments = ExecClassif.initMonoviewExps(benchmark, {}, {}, 0, {})
        self.assertEqual(arguments, {})

    def test_initMonoviewArguments_empty(self):
        benchmark = {"Monoview":{}, "Multiview":{}}
-        arguments = ExecClassif.initMonoviewExps(benchmark, {}, [], None, 0, {})
+        arguments = ExecClassif.initMonoviewExps(benchmark, {}, {}, 0, {})

 class Essai(unittest.TestCase):