Skip to content
Snippets Groups Projects
Commit c4216e85 authored by bbauvin's avatar bbauvin
Browse files

Corrected multiple errors on multiclass classification

parent f9c5fcf4
Branches
Tags
No related merge requests found
Showing
with 279 additions and 197 deletions
...@@ -339,7 +339,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict ...@@ -339,7 +339,7 @@ def publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDict
pass pass
def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics): def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, metrics):
"""Used to tranform one versus one results in multiclass results and to publish it""" """Used to tranform one versus one results in multiclass results and to publish it"""
multiclassResults = [{} for _ in range(statsIter)] multiclassResults = [{} for _ in range(statsIter)]
for iterIndex in range(statsIter): for iterIndex in range(statsIter):
...@@ -357,9 +357,9 @@ def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLa ...@@ -357,9 +357,9 @@ def analyzeMulticlass(results, statsIter, argumentDictionaries, nbExamples, nbLa
for iterIndex, multiclassiterResult in enumerate(multiclassResults): for iterIndex, multiclassiterResult in enumerate(multiclassResults):
for key, value in multiclassiterResult.items(): for key, value in multiclassiterResult.items():
multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)} multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)}
multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, argumentDictionaries) multiclassResults = genMetricsScores(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries)
multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels) multiclassResults = getErrorOnLabels(multiclassResults, multiclassLabels)
publishMulticlassResults(multiclassResults, metrics, statsIter, argumentDictionaries) publishMulticlassResults(multiclassResults, metrics, statsIter, benchmarkArgumentDictionaries)
return multiclassResults return multiclassResults
...@@ -373,11 +373,11 @@ def analyzeIter(results): ...@@ -373,11 +373,11 @@ def analyzeIter(results):
pass pass
def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclassLabels, metrics): def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics):
if statsIter > 1: if statsIter > 1:
if nbMulticlass > 1: if nbMulticlass > 1:
analyzeBiclass(results) analyzeBiclass(results)
multiclassResults = analyzeMulticlass(results, statsIter, argumentDictionaries, multiclassLabels, metrics) multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, multiclassLabels, metrics)
analyzeIter(multiclassResults) analyzeIter(multiclassResults)
else: else:
biclassResults = analyzeBiclass(results) biclassResults = analyzeBiclass(results)
...@@ -391,15 +391,25 @@ def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclas ...@@ -391,15 +391,25 @@ def getResults(results, statsIter, nbMulticlass, argumentDictionaries, multiclas
def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore, benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests""" ExecMultiview_multicore args are only used for tests"""
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices, testIndices = classificationIndices
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
resultsMonoview = [] resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values()) labelsNames = list(LABELS_DICTIONARY.values())
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_GS_iter, **argument) nIter=args.CL_GS_iter, **argument)
for argument in argumentDictionaries["Monoview"]] for argument in argumentDictionaries["Monoview"]]
...@@ -410,7 +420,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class ...@@ -410,7 +420,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
resultsMultiview = [] resultsMultiview = []
resultsMultiview += [ resultsMultiview += [
ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, **arguments) metrics=metrics, nIter=args.CL_GS_iter, **arguments)
for arguments in argumentDictionaries["Multiview"]] for arguments in argumentDictionaries["Multiview"]]
return [flag, resultsMonoview, resultsMultiview] return [flag, resultsMonoview, resultsMultiview]
...@@ -418,9 +428,21 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class ...@@ -418,9 +428,21 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, ExecMonoview_multicore=ExecMonoview_multicore, benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): ExecMonoview_multicore=ExecMonoview_multicore,
ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
"""Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and
ExecMultiview_multicore args are only used for tests"""
if not os.path.exists(os.path.dirname(directory + "train_labels.csv")):
try:
os.makedirs(os.path.dirname(directory + "train_labels.csv"))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
trainIndices, testIndices = classificationIndices
trainLabels = labels[trainIndices]
np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",")
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview = [] resultsMonoview = []
labelsNames = list(LABELS_DICTIONARY.values()) labelsNames = list(LABELS_DICTIONARY.values())
...@@ -430,7 +452,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -430,7 +452,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
for stepIndex in range(nbMulticoreToDo): for stepIndex in range(nbMulticoreToDo):
resultsMonoview += (Parallel(n_jobs=nbCores)( resultsMonoview += (Parallel(n_jobs=nbCores)(
delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds, delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, metrics=metrics, nIter=args.CL_GS_iter,
**argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
...@@ -445,7 +467,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -445,7 +467,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
for stepIndex in range(nbMulticoreToDo): for stepIndex in range(nbMulticoreToDo):
resultsMultiview += Parallel(n_jobs=nbCores)( resultsMultiview += Parallel(n_jobs=nbCores)(
delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
args.type, args.pathF, LABELS_DICTIONARY, randomState, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
**argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
...@@ -453,8 +475,17 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -453,8 +475,17 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
return [flag, resultsMonoview, resultsMultiview] return [flag, resultsMonoview, resultsMultiview]
def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, def execOneBenchmarkMonoCore(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None,
execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore): kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None,
benchmark=None, views=None, viewsIndices=None, flag=None, labels=None,
ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore,
initMultiviewArguments=initMultiviewArguments):
pass
def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionaries,
execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore,
execOneBenchmarkMonoCore=execOneBenchmarkMonoCore):
"""Used to execute the needed benchmark(s) on multicore or mono-core functions """Used to execute the needed benchmark(s) on multicore or mono-core functions
The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests""" The execOneBenchmark and execOneBenchmark_multicore keywords args are only used in the tests"""
# TODO : find a way to flag # TODO : find a way to flag
...@@ -463,24 +494,24 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multi ...@@ -463,24 +494,24 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, argumentsDictionaries, multi
results = [] results = []
if nbCores > 1: if nbCores > 1:
if statsIter > 1 or nbMulticlass > 1: if statsIter > 1 or nbMulticlass > 1:
nbExpsToDo = nbMulticlass*statsIter nbExpsToDo = len(benchmarkArgumentsDictionaries)
nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores))) nbMulticoreToDo = range(int(math.ceil(float(nbExpsToDo) / nbCores)))
for stepIndex in nbMulticoreToDo: for stepIndex in nbMulticoreToDo:
results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark) results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark)
(coreIndex=coreIndex, (coreIndex=coreIndex,
**argumentsDictionaries[coreIndex + stepIndex * nbCores]) **benchmarkArgumentsDictionaries[coreIndex + stepIndex * nbCores])
for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores)))) for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores))))
else: else:
results += [execOneBenchmark_multicore(nbCores=nbCores, **argumentsDictionaries[0])] results += [execOneBenchmark_multicore(nbCores=nbCores, **benchmarkArgumentsDictionaries[0])]
else: else:
for arguments in argumentsDictionaries: for arguments in benchmarkArgumentsDictionaries:
results += [execOneBenchmark(**arguments)] results += [execOneBenchmarkMonoCore(**arguments)]
logging.debug("Done:\t Executing all the needed biclass benchmarks") logging.debug("Done:\t Executing all the needed biclass benchmarks")
# Do everything with flagging # Do everything with flagging
logging.debug("Start:\t Analyzing preds") logging.debug("Start:\t Analyzing preds")
# getResults(results, statsIter, nbMulticlass, argumentsDictionaries, multiclassLabels, metrics) # getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassLabels, metrics)
logging.debug("Done:\t Analyzing preds") logging.debug("Done:\t Analyzing preds")
return results return results
...@@ -552,7 +583,33 @@ def execClassif(arguments): ...@@ -552,7 +583,33 @@ def execClassif(arguments):
directories = execution.genDirecortiesNames(directory, statsIter, labelsCombinations, directories = execution.genDirecortiesNames(directory, statsIter, labelsCombinations,
multiclassMethod, LABELS_DICTIONARY) multiclassMethod, LABELS_DICTIONARY)
# TODO : Gen arguments dictionaries # TODO : Gen arguments dictionaries
benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels, labelsCombinations, oldIndicesMulticlass, hyperParamSearch, args, kFolds, statsIterRandomStates, metrics, argumentDictionaries, benchmark) benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels,
labelsCombinations, oldIndicesMulticlass,
hyperParamSearch, args, kFolds,
statsIterRandomStates, metrics,
argumentDictionaries, benchmark)
nbMulticlass = len(labelsCombinations)
execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries)
if statsIter > 1: if statsIter > 1:
logging.debug("Start:\t Benchmark classification") logging.debug("Start:\t Benchmark classification")
......
...@@ -89,7 +89,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, ...@@ -89,7 +89,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred,
def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType,
path, randomState, hyperParamSearch="randomizedSearch", path, randomState, labels, hyperParamSearch="randomizedSearch",
metrics=[["accuracy_score", None]], nIter=30, **args): metrics=[["accuracy_score", None]], nIter=30, **args):
DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r") DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r")
kwargs = args["args"] kwargs = args["args"]
...@@ -97,7 +97,7 @@ def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, ...@@ -97,7 +97,7 @@ def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices,
range(DATASET.get("Metadata").attrs["nbView"])] range(DATASET.get("Metadata").attrs["nbView"])]
neededViewIndex = views.index(kwargs["feat"]) neededViewIndex = views.index(kwargs["feat"])
X = DATASET.get("View" + str(neededViewIndex)) X = DATASET.get("View" + str(neededViewIndex))
Y = DATASET.get("Labels").value Y = labels
return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path, return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path,
randomState, hyperParamSearch=hyperParamSearch, randomState, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=nIter, **args) metrics=metrics, nIter=nIter, **args)
...@@ -143,8 +143,8 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol ...@@ -143,8 +143,8 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
logging.debug("Start:\t Predicting") logging.debug("Start:\t Predicting")
full_labels_pred = cl_res.predict(X) full_labels_pred = cl_res.predict(X)
y_train_pred = full_labels_pred[classificationIndices[0]] y_train_pred = cl_res.predict(X[classificationIndices[0]])
y_test_pred = full_labels_pred[classificationIndices[1]] y_test_pred = cl_res.predict(X[classificationIndices[1]])
logging.debug("Done:\t Predicting") logging.debug("Done:\t Predicting")
t_end = time.time() - t_start t_end = time.time() - t_start
......
...@@ -68,17 +68,17 @@ def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, clas ...@@ -68,17 +68,17 @@ def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, clas
def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY,
randomState, randomState, labels,
hyperParamSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): hyperParamSearch=False, nbCores=1, metrics=None, nIter=30, **arguments):
"""Used to load an HDF5 dataset for each parallel job and execute multiview classification""" """Used to load an HDF5 dataset for each parallel job and execute multiview classification"""
DATASET = h5py.File(path + name + str(coreIndex) + ".hdf5", "r") DATASET = h5py.File(path + name + str(coreIndex) + ".hdf5", "r")
return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY,
randomState, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments) hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments)
def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path,
LABELS_DICTIONARY, randomState, LABELS_DICTIONARY, randomState, labels,
hyperParamSearch=False, metrics=None, nIter=30, **kwargs): hyperParamSearch=False, metrics=None, nIter=30, **kwargs):
"""Used to execute multiview classification and result analysis""" """Used to execute multiview classification and result analysis"""
logging.debug("Start:\t Initialize constants") logging.debug("Start:\t Initialize constants")
...@@ -106,7 +106,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor ...@@ -106,7 +106,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor
logging.debug("Start:\t Optimizing hyperparameters") logging.debug("Start:\t Optimizing hyperparameters")
if hyperParamSearch != "None": if hyperParamSearch != "None":
classifier = HyperParameterSearch.searchBestSettings(DATASET, classifierPackage, classifier = HyperParameterSearch.searchBestSettings(DATASET, labels, classifierPackage,
CL_type, metrics, learningIndices, CL_type, metrics, learningIndices,
KFolds, randomState, KFolds, randomState,
viewsIndices=viewsIndices, viewsIndices=viewsIndices,
...@@ -117,7 +117,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor ...@@ -117,7 +117,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor
logging.debug("Done:\t Optimizing hyperparameters") logging.debug("Done:\t Optimizing hyperparameters")
logging.debug("Start:\t Fitting classifier") logging.debug("Start:\t Fitting classifier")
classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices, metric=metrics[0]) classifier.fit_hdf5(DATASET, labels, trainIndices=learningIndices, viewsIndices=viewsIndices, metric=metrics[0])
logging.debug("Done:\t Fitting classifier") logging.debug("Done:\t Fitting classifier")
logging.debug("Start:\t Predicting") logging.debug("Start:\t Predicting")
......
...@@ -167,8 +167,8 @@ class FusionClass: ...@@ -167,8 +167,8 @@ class FusionClass:
def setParams(self, paramsSet): def setParams(self, paramsSet):
self.classifier.setParams(paramsSet) self.classifier.setParams(paramsSet)
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]):
self.classifier.fit_hdf5(DATASET, trainIndices=trainIndices, viewsIndices=viewsIndices) self.classifier.fit_hdf5(DATASET, labels, trainIndices=trainIndices, viewsIndices=viewsIndices)
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if usedIndices is None: if usedIndices is None:
......
...@@ -71,7 +71,7 @@ class WeightedLinear(EarlyFusionClassifier): ...@@ -71,7 +71,7 @@ class WeightedLinear(EarlyFusionClassifier):
else: else:
self.weights = np.array(map(float, kwargs['fusionMethodConfig'])) self.weights = np.array(map(float, kwargs['fusionMethodConfig']))
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None):
if type(viewsIndices) == type(None): if type(viewsIndices) == type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
if trainIndices is None: if trainIndices is None:
...@@ -80,7 +80,7 @@ class WeightedLinear(EarlyFusionClassifier): ...@@ -80,7 +80,7 @@ class WeightedLinear(EarlyFusionClassifier):
self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices, viewsIndices=viewsIndices) self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices, viewsIndices=viewsIndices)
monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName) monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName)
self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData,
DATASET.get("Labels").value[trainIndices], labels[trainIndices],
self.randomState, self.randomState,
NB_CORES=self.nbCores, NB_CORES=self.nbCores,
**self.monoviewClassifiersConfig) **self.monoviewClassifiersConfig)
......
...@@ -133,7 +133,7 @@ class LateFusionClassifier(object): ...@@ -133,7 +133,7 @@ class LateFusionClassifier(object):
self.monoviewSelection = monoviewSelection self.monoviewSelection = monoviewSelection
self.randomState = randomState self.randomState = randomState
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None):
if type(viewsIndices) == type(None): if type(viewsIndices) == type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
if trainIndices is None: if trainIndices is None:
...@@ -142,6 +142,6 @@ class LateFusionClassifier(object): ...@@ -142,6 +142,6 @@ class LateFusionClassifier(object):
self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)( self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)(
delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index],
getV(DATASET, viewIndex, trainIndices), getV(DATASET, viewIndex, trainIndices),
DATASET.get("Labels").value[trainIndices], labels[trainIndices],
self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState) self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState)
for index, viewIndex in enumerate(viewsIndices)) for index, viewIndex in enumerate(viewsIndices))
...@@ -114,7 +114,7 @@ class SCMForLinear(LateFusionClassifier): ...@@ -114,7 +114,7 @@ class SCMForLinear(LateFusionClassifier):
self.order = paramsSet[3] self.order = paramsSet[3]
self.modelType = paramsSet[2] self.modelType = paramsSet[2]
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None):
if viewsIndices is None: if viewsIndices is None:
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
if trainIndices is None: if trainIndices is None:
...@@ -123,7 +123,7 @@ class SCMForLinear(LateFusionClassifier): ...@@ -123,7 +123,7 @@ class SCMForLinear(LateFusionClassifier):
monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index])
self.monoviewClassifiers.append( self.monoviewClassifiers.append(
monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices),
DATASET.get("Labels").value[trainIndices], self.randomState, labels[trainIndices], self.randomState,
NB_CORES=self.nbCores, NB_CORES=self.nbCores,
**self.monoviewClassifiersConfigs[index])) **self.monoviewClassifiersConfigs[index]))
self.SCMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) self.SCMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices)
......
...@@ -57,7 +57,7 @@ class SVMForLinear(LateFusionClassifier): ...@@ -57,7 +57,7 @@ class SVMForLinear(LateFusionClassifier):
NB_CORES=NB_CORES) NB_CORES=NB_CORES)
self.SVMClassifier = None self.SVMClassifier = None
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None):
if viewsIndices is None: if viewsIndices is None:
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
if trainIndices is None: if trainIndices is None:
...@@ -72,7 +72,7 @@ class SVMForLinear(LateFusionClassifier): ...@@ -72,7 +72,7 @@ class SVMForLinear(LateFusionClassifier):
for configIndex, config in enumerate(self.monoviewClassifiersConfigs[index])) for configIndex, config in enumerate(self.monoviewClassifiersConfigs[index]))
self.monoviewClassifiers.append( self.monoviewClassifiers.append(
monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices),
DATASET.get("Labels").value[trainIndices], self.randomState, labels[trainIndices], self.randomState,
NB_CORES=self.nbCores, NB_CORES=self.nbCores,
**self.monoviewClassifiersConfigs[index])) **self.monoviewClassifiersConfigs[index]))
else: else:
......
...@@ -199,7 +199,7 @@ class MumboClass: ...@@ -199,7 +199,7 @@ class MumboClass:
self.predictions = np.zeros((self.maxIter, nbView, trainLength)) self.predictions = np.zeros((self.maxIter, nbView, trainLength))
self.generalFs = np.zeros((self.maxIter, trainLength, nbClass)) self.generalFs = np.zeros((self.maxIter, trainLength, nbClass))
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]):
# Initialization # Initialization
if self.classifiersConfigs is None: if self.classifiersConfigs is None:
...@@ -212,7 +212,7 @@ class MumboClass: ...@@ -212,7 +212,7 @@ class MumboClass:
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
NB_VIEW = len(viewsIndices) NB_VIEW = len(viewsIndices)
trainLength = len(trainIndices) trainLength = len(trainIndices)
LABELS = DATASET.get("Labels").value[trainIndices] LABELS = labels[trainIndices]
self.initDataDependant(trainLength, NB_VIEW, NB_CLASS, LABELS) self.initDataDependant(trainLength, NB_VIEW, NB_CLASS, LABELS)
# Learning # Learning
isStabilized = False isStabilized = False
......
...@@ -6,14 +6,14 @@ import itertools ...@@ -6,14 +6,14 @@ import itertools
from .. import Metrics from .. import Metrics
def searchBestSettings(dataset, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, def searchBestSettings(dataset, labels, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None,
searchingTool="hyperParamSearch", nIter=1, **kwargs): searchingTool="hyperParamSearch", nIter=1, **kwargs):
"""Used to select the right hyperparam optimization function to optimize hyper parameters""" """Used to select the right hyperparam optimization function to optimize hyper parameters"""
if viewsIndices is None: if viewsIndices is None:
viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
thismodule = sys.modules[__name__] thismodule = sys.modules[__name__]
searchingToolMethod = getattr(thismodule, searchingTool) searchingToolMethod = getattr(thismodule, searchingTool)
bestSettings = searchingToolMethod(dataset, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState, bestSettings = searchingToolMethod(dataset, labels, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState,
viewsIndices=viewsIndices, nIter=nIter, **kwargs) viewsIndices=viewsIndices, nIter=nIter, **kwargs)
return bestSettings # or well set clasifier ? return bestSettings # or well set clasifier ?
...@@ -23,7 +23,7 @@ def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, ...@@ -23,7 +23,7 @@ def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1,
pass pass
def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1, def randomizedSearch(dataset, labels, classifierPackage, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1,
nbCores=1, **classificationKWARGS): nbCores=1, **classificationKWARGS):
"""Used to perform a random search on the classifiers to optimize hyper parameters""" """Used to perform a random search on the classifiers to optimize hyper parameters"""
if viewsIndices is None: if viewsIndices is None:
...@@ -45,7 +45,7 @@ def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learni ...@@ -45,7 +45,7 @@ def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learni
baseScore = 1000.0 baseScore = 1000.0
isBetter = "lower" isBetter = "lower"
bestSettings = None bestSettings = None
kFolds = KFolds.split(learningIndices, dataset.get("Labels").value[learningIndices]) kFolds = KFolds.split(learningIndices, labels[learningIndices])
for paramsSet in paramsSets: for paramsSet in paramsSets:
scores = [] scores = []
for trainIndices, testIndices in kFolds: for trainIndices, testIndices in kFolds:
...@@ -54,7 +54,7 @@ def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learni ...@@ -54,7 +54,7 @@ def randomizedSearch(dataset, classifierPackage, classifierName, metrics, learni
classifier.fit_hdf5(dataset, trainIndices=learningIndices[trainIndices], viewsIndices=viewsIndices) classifier.fit_hdf5(dataset, trainIndices=learningIndices[trainIndices], viewsIndices=viewsIndices)
testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices], testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices],
viewsIndices=viewsIndices) viewsIndices=viewsIndices)
testScore = metricModule.score(dataset.get("Labels").value[learningIndices[testIndices]], testLabels) testScore = metricModule.score(labels[learningIndices[testIndices]], testLabels)
scores.append(testScore) scores.append(testScore)
crossValScore = np.mean(np.array(scores)) crossValScore = np.mean(np.array(scores))
......
...@@ -11,22 +11,28 @@ def genMulticlassLabels(labels, multiclassMethod, classificationIndices): ...@@ -11,22 +11,28 @@ def genMulticlassLabels(labels, multiclassMethod, classificationIndices):
combinations = itertools.combinations(np.arange(nbLabels), 2) combinations = itertools.combinations(np.arange(nbLabels), 2)
multiclassLabels = [] multiclassLabels = []
labelsIndices = [] labelsIndices = []
oldIndicesMulticlass = [] indicesMulticlass = []
for combination in combinations: for combination in combinations:
labelsIndices.append(combination) labelsIndices.append(combination)
oldIndices = [exampleIndex oldIndices = [exampleIndex
for exampleIndex, exampleLabel in enumerate(labels) for exampleIndex, exampleLabel in enumerate(labels)
if exampleLabel in combination] if exampleLabel in combination]
oldTrainIndices = [[oldIndex for oldIndex in oldIndicesMulticlass if oldIndex in trainIndices] trainIndices = [np.array([oldIndex for oldIndex in oldIndices if oldIndex in iterIndices[0]])
for trainIndices, testIndices in classificationIndices] for iterIndices in classificationIndices]
oldTestIndices = [[oldIndex for oldIndex in oldIndicesMulticlass if oldIndex in testIndices] testIndices = [np.array([oldIndex for oldIndex in oldIndices if oldIndex in iterindices[1]])
for trainIndices, testIndices in classificationIndices] for iterindices in classificationIndices]
oldIndicesMulticlass.append([oldTrainIndices, oldTestIndices]) indicesMulticlass.append([trainIndices, testIndices])
multiclassLabels.append(np.array([1 if exampleLabel == combination[0] newLabels = np.zeros(len(labels), dtype=int)-100
else 0 for labelIndex, label in enumerate(labels):
for exampleLabel in labels[oldIndices]])) if label == combination[0]:
newLabels[labelIndex] = 1
elif label == combination[1]:
newLabels[labelIndex] = 0
else:
pass
multiclassLabels.append(newLabels)
elif multiclassMethod == "oneVersusRest": elif multiclassMethod == "oneVersusRest":
# TODO : Implement one versus rest if probas are not a problem anymore # TODO : Implement one versus rest if probas are not a problem anymore
pass pass
return multiclassLabels, labelsIndices, oldIndicesMulticlass return multiclassLabels, labelsIndices, indicesMulticlass
# #
\ No newline at end of file
...@@ -10,12 +10,19 @@ class Test_genMulticlassLabels(unittest.TestCase): ...@@ -10,12 +10,19 @@ class Test_genMulticlassLabels(unittest.TestCase):
def setUpClass(cls): def setUpClass(cls):
cls.random_state = np.random.RandomState(42) cls.random_state = np.random.RandomState(42)
cls.labels = cls.random_state.randint(0,5,50) cls.labels = cls.random_state.randint(0,5,50)
cls.testIndices = [cls.random_state.choice(np.arange(50),size=10, replace=False), cls.random_state.choice(np.arange(50),size=10, replace=False)]
cls.classificationIndices = [[np.array([_ for _ in range(50) if _ not in cls.testIndices[0]]), cls.testIndices[0]],
[np.array([_ for _ in range(50) if _ not in cls.testIndices[1]]), cls.testIndices[1]]]
def test_one_versus_one(cls): def test_one_versus_one(cls):
multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(cls.labels, "oneVersusOne") multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(cls.labels, "oneVersusOne", cls.classificationIndices)
cls.assertEqual(len(multiclassLabels), 10) cls.assertEqual(len(multiclassLabels), 10)
cls.assertEqual(labelsIndices, [(0,1), (0,2), (0,3), (0,4), (1,2), (1,3), (1,4), (2,3), (2,4), (3,4)]) cls.assertEqual(labelsIndices, [(0,1), (0,2), (0,3), (0,4), (1,2), (1,3), (1,4), (2,3), (2,4), (3,4)])
np.testing.assert_array_equal(oldIndicesMulticlass[0], np.testing.assert_array_equal(oldIndicesMulticlass[0][0][0],
np.array([5, 13, 15, 18, 20, 23, 24, 27, 33, 38, 39, 41, 43, 44, 45, 46, 48])) np.array([5, 13, 15, 18, 20, 24, 27, 39, 41, 43, 44, 45, 46, 48]))
np.testing.assert_array_equal(multiclassLabels[0], np.testing.assert_array_equal(multiclassLabels[0],
np.array([0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0])) np.array([-100, -100, -100, -100, -100, 0, -100, -100, -100, -100, -100, -100,
-100, 0, -100, 0, -100, -100, 1, -100, 0, -100, -100, 1, 1, -100, -100,
0, -100, -100, -100, -100, -100, 1, -100, -100, -100, -100, 1, 0, -100,
1, -100, 0, 0, 1, 0, -100, 0, -100 ]))
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment