Skip to content
Snippets Groups Projects
Commit 7a4dc498 authored by bbauvin's avatar bbauvin
Browse files

Adding multiclass

parent 3d2a2052
No related branches found
No related tags found
No related merge requests found
language: python
python:
- 2.7
- 3.6
addons:
apt:
packages:
- libblas-dev
- liblapack-dev
- gfortran
install:
- pip install -U pip pip-tools
- pip install numpy scipy scikit-learn
- git clone https://github.com/aldro61/pyscm.git && python setup.py install
script:
- python setup.py test
notifications:
email:
on_success: change
on_failure: change
\ No newline at end of file
......@@ -19,7 +19,7 @@ from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore
from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
from .utils import GetMultiviewDb as DB
from .ResultAnalysis import resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes
from .utils import execution, Dataset
from .utils import execution, Dataset, Multiclass
# Author-Info
__author__ = "Baptiste Bauvin"
......@@ -66,15 +66,23 @@ def initBenchmark(args):
return benchmark
def initMonoviewExps(benchmark, argumentDictionaries, views, allViews, NB_CLASS, kwargsInit):
def genViewsDictionnary(DATASET):
datasetsNames = DATASET.keys()
viewsDictionary = dict((DATASET.get(datasetName).attrs["name"], int(datasetName[4:]))
for datasetName in datasetsNames
if datasetName[:4]=="View")
return viewsDictionary
def initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS, kwargsInit):
"""Used to add each monoview exeperience args to the list of monoview experiences args"""
if benchmark["Monoview"]:
argumentDictionaries["Monoview"] = []
for view in views:
for viewName, viewIndex in viewsDictionary.items():
for classifier in benchmark["Monoview"]:
arguments = {
"args": {classifier + "KWARGS": kwargsInit[classifier + "KWARGSInit"], "feat": view,
"CL_type": classifier, "nbClass": NB_CLASS}, "viewIndex": allViews.index(view)}
"args": {classifier + "KWARGS": kwargsInit[classifier + "KWARGSInit"], "feat": viewName,
"CL_type": classifier, "nbClass": NB_CLASS}, "viewIndex": viewIndex}
argumentDictionaries["Monoview"].append(arguments)
return argumentDictionaries
......@@ -272,18 +280,20 @@ def execClassif(arguments):
DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
args.CL_classes)
datasetLength = DATASET.get("Metadata").attrs["datasetLength"]
classificationIndices = execution.genSplits(statsIter, datasetLength, DATASET, args.CL_split, statsIterRandomStates)
multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value)
classificationIndices = execution.genSplits(statsIter, oldIndicesMulticlass, multiclassLabels, args.CL_split, statsIterRandomStates)
kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)
datasetFiles = Dataset.initMultipleDatasets(args, nbCores)
views, viewsIndices, allViews = execution.initViews(DATASET, args)
if not views:
raise ValueError("Empty views list, modify selected views to match dataset " + args.views)
# views, viewsIndices, allViews = execution.initViews(DATASET, args)
# if not views:
# raise ValueError("Empty views list, modify selected views to match dataset " + args.views)
viewsDictionary = genViewsDictionnary(DATASET)
NB_VIEW = len(views)
# NB_VIEW = DATASET.get("Metadata").attrs["nbViews"]
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
metrics = [metric.split(":") for metric in args.CL_metrics]
......@@ -296,7 +306,7 @@ def execClassif(arguments):
if len(metric) == 1:
metrics[metricIndex] = [metric[0], None]
logging.info("Start:\t Finding all available mono- & multiview algorithms")
logging.debug("Start:\t Finding all available mono- & multiview algorithms")
benchmark = initBenchmark(args)
......@@ -305,7 +315,7 @@ def execClassif(arguments):
dataBaseTime = time.time() - start
argumentDictionaries = {"Monoview": [], "Multiview": []}
argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, views, allViews, NB_CLASS,
argumentDictionaries = initMonoviewExps(benchmark, argumentDictionaries, viewsDictionary, NB_CLASS,
initKWARGS)
directories = execution.genDirecortiesNames(directory, statsIter)
......@@ -323,7 +333,7 @@ def execClassif(arguments):
np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",")
if nbCores > 1:
iterResults = []
nbExperiments = statsIter
nbExperiments = statsIter*len(multiclassLabels)
for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))):
iterResults += (Parallel(n_jobs=nbCores)(
delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1,
......
import numpy as np
import itertools
def genMulticlassLabels(labels, multiclassMethod):
if multiclassMethod == "oneVersusOne":
nbLabels = len(set(list(labels)))
if nbLabels == 2:
return [labels], [(0,1)], np.arange(len(labels))
else:
combinations = itertools.combinations(np.arange(nbLabels), 2)
multiclassLabels = []
labelsIndices = []
oldIndicesMulticlass = []
for combination in combinations:
labelsIndices.append(combination)
oldIndices = [exampleIndex
for exampleIndex, exampleLabel in enumerate(labels)
if exampleLabel in combination]
multiclassLabels.append(np.array([1 if exampleLabel==combination[0]
else 0
for exampleLabel in labels[oldIndices]]))
oldIndicesMulticlass.append(oldIndices)
elif multiclassMethod == "oneVersusRest":
# TODO : Implement one versus rest if probas are not a problem anymore
pass
return multiclassLabels, labelsIndices, oldIndicesMulticlass
......@@ -42,7 +42,7 @@ def parseTheArgs(arguments):
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_multiclassMethod', metavar='STRING', action='store',
help='Determine which multiclass method to use if the dataset is multiclass',
default="biclass")
default="oneVersusOne")
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
help='Determine the split ratio between learning and validation sets', type=float,
default=0.2)
......@@ -238,30 +238,33 @@ def initLogFile(args):
return resultDirectory
def genSplits(statsIter, datasetlength, DATASET, splitRatio, statsIterRandomStates):
def genSplits(statsIter, oldIndicesMulticlass, multiclasslabels, splitRatio, statsIterRandomStates, multiclassMethod):
"""Used to gen the train/test splits using one or multiple random states"""
indices = np.arange(datasetlength)
for oldIndices, labels in zip(oldIndicesMulticlass, multiclasslabels):
indices = oldIndices
splitsMulticlass = []
if statsIter > 1:
splits = []
for randomState in statsIterRandomStates:
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,
random_state=randomState,
test_size=splitRatio)
folds = foldsObj.split(indices, DATASET.get("Labels").value)
folds = foldsObj.split(indices, labels)
for fold in folds:
train_fold, test_fold = fold
trainIndices = indices[train_fold]
testIndices = indices[test_fold]
splits.append([trainIndices, testIndices])
return splits
splitsMulticlass.append(splits)
else:
foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, random_state=statsIterRandomStates, test_size=splitRatio)
folds = foldsObj.split(indices, DATASET.get("Labels").value)
folds = foldsObj.split(indices, labels)
for fold in folds:
train_fold, test_fold = fold
trainIndices = indices[train_fold]
testIndices = indices[test_fold]
return trainIndices, testIndices
splitsMulticlass.append((trainIndices, testIndices))
return splitsMulticlass
def genKFolds(statsIter, nbFolds, statsIterRandomStates):
......@@ -293,12 +296,29 @@ def initViews(DATASET, args):
return views, viewsIndices, allViews
def genDirecortiesNames(directory, statsIter):
def genDirecortiesNames(directory, statsIter, labelsIndices, multiclassMethod, labelDictionary):
"""Used to generate the different directories of each iteration if needed"""
if statsIter > 1:
directories = []
for i in range(statsIter):
directories.append(directory + "iter_" + str(i + 1) + "/")
return directories
if multiclassMethod == "oneVersusOne":
for labelIndex1, labelIndex2 in labelsIndices:
labelName1 = labelDictionary[labelIndex1]
labelName2 = labelDictionary[labelIndex2]
directories.append(directory + "iter_" + str(i + 1) + "/"+labelName1+"_vs_"+labelName2+"/")
elif multiclassMethod == "oneVersusRest":
for labelIndex in labelsIndices:
labelName = labelDictionary[labelIndex]
directories.append(directory + "iter_" + str(i + 1) + "/"+labelName+"_vs_Rest/")
else:
return directory
directories = []
if multiclassMethod == "oneVersusOne":
for labelIndex1, labelIndex2 in labelsIndices:
labelName1 = labelDictionary[labelIndex1]
labelName2 = labelDictionary[labelIndex2]
directories.append(directory +labelName1+"_vs_"+labelName2+"/")
elif multiclassMethod == "oneVersusRest":
for labelIndex in labelsIndices:
labelName = labelDictionary[labelIndex]
directories.append(directory +labelName+"_vs_Rest/")
return directories
......@@ -88,3 +88,49 @@ class Test_genKFolds(unittest.TestCase):
def test_genKFolds_iter(self):
pass
class Test_genDirecortiesNames(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.directory = "../chicken_is_heaven/"
cls.stats_iter = 5
cls.labels_indices = [(0,1), (0,2), (0,3), (1,2), (1,3), (2,3)]
cls.multiclass_method = "oneVersusOne"
cls.labels_dictionary = {0:"test1", 1:"test2", 2:"test3", 3:"test4"}
pass
def test_simple_ovo(cls):
directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
cls.multiclass_method, cls.labels_dictionary)
cls.assertEqual(len(directories), 30)
cls.assertEqual(directories[0], "../chicken_is_heaven/iter_1/test1_vs_test2/")
cls.assertEqual(directories[-1], "../chicken_is_heaven/iter_5/test3_vs_test4/")
def test_simple_ovr(cls):
cls.multiclass_method = "oneVersusRest"
cls.labels_indices = [0,1,2,3]
directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
cls.multiclass_method, cls.labels_dictionary)
cls.assertEqual(len(directories), 20)
cls.assertEqual(directories[-1], "../chicken_is_heaven/iter_5/test4_vs_Rest/")
cls.assertEqual(directories[0], "../chicken_is_heaven/iter_1/test1_vs_Rest/")
def test_ovo_no_iter(cls):
cls.stats_iter = 1
directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
cls.multiclass_method, cls.labels_dictionary)
cls.assertEqual(len(directories), 6)
cls.assertEqual(directories[0], "../chicken_is_heaven/test1_vs_test2/")
cls.assertEqual(directories[-1], "../chicken_is_heaven/test3_vs_test4/")
def test_ovr_no_iter(cls):
cls.stats_iter = 1
cls.multiclass_method = "oneVersusRest"
cls.labels_indices = [0,1,2,3]
directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter, cls.labels_indices,
cls.multiclass_method, cls.labels_dictionary)
cls.assertEqual(len(directories), 4)
cls.assertEqual(directories[-1], "../chicken_is_heaven/test4_vs_Rest/")
cls.assertEqual(directories[0], "../chicken_is_heaven/test1_vs_Rest/")
\ No newline at end of file
import unittest
import numpy as np
from ...MonoMultiViewClassifiers.utils import Multiclass
class Test_genMulticlassLabels(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.random_state = np.random.RandomState(42)
cls.labels = cls.random_state.randint(0,5,50)
def test_one_versus_one(cls):
multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(cls.labels, "oneVersusOne")
cls.assertEqual(len(multiclassLabels), 10)
cls.assertEqual(labelsIndices, [(0,1), (0,2), (0,3), (0,4), (1,2), (1,3), (1,4), (2,3), (2,4), (3,4)])
np.testing.assert_array_equal(oldIndicesMulticlass[0],
np.array([5, 13, 15, 18, 20, 23, 24, 27, 33, 38, 39, 41, 43, 44, 45, 46, 48]))
np.testing.assert_array_equal(multiclassLabels[0],
np.array([0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0]))
......@@ -23,12 +23,12 @@ class Test_initMonoviewArguments(unittest.TestCase):
def test_initMonoviewArguments_no_monoview(self):
benchmark = {"Monoview":{}, "Multiview":{}}
arguments = ExecClassif.initMonoviewExps(benchmark, {}, [], None, 0, {})
arguments = ExecClassif.initMonoviewExps(benchmark, {}, {}, 0, {})
self.assertEqual(arguments, {})
def test_initMonoviewArguments_empty(self):
benchmark = {"Monoview":{}, "Multiview":{}}
arguments = ExecClassif.initMonoviewExps(benchmark, {}, [], None, 0, {})
arguments = ExecClassif.initMonoviewExps(benchmark, {}, {}, 0, {})
class Essai(unittest.TestCase):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment