Skip to content
Snippets Groups Projects
Commit e78c4e02 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Worked on cluster adaptation for monoview calssification

parent 61faae87
Branches
Tags
No related merge requests found
...@@ -167,7 +167,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class ...@@ -167,7 +167,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, labels, coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_GS_iter, **argument) nIter=args.CL_HPS_iter, **argument)
for argument in argumentDictionaries["Monoview"]] for argument in argumentDictionaries["Monoview"]]
logging.debug("Done:\t Monoview benchmark") logging.debug("Done:\t Monoview benchmark")
...@@ -181,7 +181,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class ...@@ -181,7 +181,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
resultsMultiview += [ resultsMultiview += [
ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, **arguments) metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
for arguments in argumentDictionaries["Multiview"]] for arguments in argumentDictionaries["Multiview"]]
logging.debug("Done:\t Multiview benchmark") logging.debug("Done:\t Multiview benchmark")
...@@ -220,7 +220,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -220,7 +220,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds, delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, labels, coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, metrics=metrics, nIter=args.CL_HPS_iter,
**argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
logging.debug("Done:\t Monoview benchmark") logging.debug("Done:\t Monoview benchmark")
...@@ -238,7 +238,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non ...@@ -238,7 +238,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
resultsMultiview += Parallel(n_jobs=nbCores)( resultsMultiview += Parallel(n_jobs=nbCores)(
delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_HPS_iter,
**argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
logging.debug("Done:\t Multiview benchmark") logging.debug("Done:\t Multiview benchmark")
...@@ -280,7 +280,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non ...@@ -280,7 +280,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non
resultsMonoview += [ExecMonoview(directory, X, Y, args.name, labelsNames, classificationIndices, kFolds, resultsMonoview += [ExecMonoview(directory, X, Y, args.name, labelsNames, classificationIndices, kFolds,
1, args.type, args.pathF, randomState, 1, args.type, args.pathF, randomState,
hyperParamSearch=hyperParamSearch, metrics=metrics, hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_GS_iter, **arguments)] nIter=args.CL_HPS_iter, **arguments)]
logging.debug("Done:\t Monoview benchmark") logging.debug("Done:\t Monoview benchmark")
logging.debug("Start:\t Multiview arguments initialization") logging.debug("Start:\t Multiview arguments initialization")
...@@ -294,7 +294,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non ...@@ -294,7 +294,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non
resultsMultiview += [ resultsMultiview += [
ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type,
args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, **arguments)] metrics=metrics, nIter=args.CL_HPS_iter, **arguments)]
logging.debug("Done:\t Multiview benchmark") logging.debug("Done:\t Multiview benchmark")
return [flag, resultsMonoview, resultsMultiview] return [flag, resultsMonoview, resultsMultiview]
...@@ -444,7 +444,7 @@ def execClassif(arguments): ...@@ -444,7 +444,7 @@ def execClassif(arguments):
# resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, # resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
# coreIndex, args.type, args.pathF, randomState, # coreIndex, args.type, args.pathF, randomState,
# hyperParamSearch=hyperParamSearch, # hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter, # metrics=metrics, nIter=args.CL_HPS_iter,
# **arguments) # **arguments)
# for arguments in argumentDictionaries["Monoview"]] # for arguments in argumentDictionaries["Monoview"]]
# monoviewTime = time.time() - dataBaseTime - start # monoviewTime = time.time() - dataBaseTime - start
...@@ -456,7 +456,7 @@ def execClassif(arguments): ...@@ -456,7 +456,7 @@ def execClassif(arguments):
# resultsMultiview += [ # resultsMultiview += [
# ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, # ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
# args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, # args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter, **arguments) # metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
# for arguments in argumentDictionaries["Multiview"]] # for arguments in argumentDictionaries["Multiview"]]
# multiviewTime = time.time() - monoviewTime - dataBaseTime - start # multiviewTime = time.time() - monoviewTime - dataBaseTime - start
# #
...@@ -501,14 +501,14 @@ def execClassif(arguments): ...@@ -501,14 +501,14 @@ def execClassif(arguments):
# args.type, # args.type,
# args.pathF, # args.pathF,
# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, # LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter, # metrics=metrics, nIter=args.CL_HPS_iter,
# **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) # **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
# for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) # for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
# else: # else:
# resultsMultiview = [ # resultsMultiview = [
# ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF, # ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF,
# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch, # LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in # metrics=metrics, nIter=args.CL_HPS_iter, **arguments) for arguments in
# argumentDictionaries["Multiview"]] # argumentDictionaries["Multiview"]]
# multiviewTime = time.time() - monoviewTime - dataBaseTime - start # multiviewTime = time.time() - monoviewTime - dataBaseTime - start
# if nbCores > 1: # if nbCores > 1:
......
...@@ -196,85 +196,106 @@ if __name__ == '__main__': ...@@ -196,85 +196,106 @@ if __name__ == '__main__':
import argparse import argparse
import pickle import pickle
from ..utils import Dataset
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='This methods is used to execute a multiclass classification with one single view. ', description='This methods is used to execute a multiclass classification with one single view. ',
formatter_class=argparse.ArgumentDefaultsHelpFormatter) formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments') groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5")
groupStandard.add_argument('--name', metavar='STRING', action='store', groupStandard.add_argument('--name', metavar='STRING', action='store',
help='Name of Database (default: %(default)s)', default='DB') help='Name of Database', default='Plausible')
groupStandard.add_argument('--cl_name', metavar='STRING', action='store',
help='THe name of the monoview classifier to use', default='DecisionTree')
groupStandard.add_argument('--view', metavar='STRING', action='store', groupStandard.add_argument('--view', metavar='STRING', action='store',
help='Name of Feature for Classification (default: %(default)s)', default='View0') help='Name of the view used', default='View0')
groupStandard.add_argument('--pathF', metavar='STRING', action='store', groupStandard.add_argument('--pathF', metavar='STRING', action='store',
help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') help='Path to the database hdf5 file', default='../../../Data/Plausible')
groupStandard.add_argument('--directory', metavar='STRING', action='store', groupStandard.add_argument('--directory', metavar='STRING', action='store',
help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') help='Path of the output directory', default='')
groupStandard.add_argument('--labelsNames', metavar='STRING', action='store', nargs='+', groupStandard.add_argument('--labelsNames', metavar='STRING', action='store', nargs='+',
help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') help='Name of the labels used for classification', default=['Yes', 'No'])
groupStandard.add_argument('--classificationIndices', metavar='STRING', action='store', groupStandard.add_argument('--classificationIndices', metavar='STRING', action='store',
help='Name of classLabels-Description CSV-file (default: %(default)s)', help='Path to the classificationIndices pickle file',
default='classLabels-Description.csv') default='')
groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, groupStandard.add_argument('--KFolds', metavar='STRING', action='store',
default=1) help='Path to the kFolds pickle file',
default='')
groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all',
type=int, default=1)
groupStandard.add_argument('--randomState', metavar='INT', action='store', groupStandard.add_argument('--randomState', metavar='INT', action='store',
help='Seed for the random state or pickable randomstate file', default=42) help='Seed for the random state or pickable randomstate file', default=42)
groupStandard.add_argument('--hyperParamSearch', metavar='STRING', action='store', groupStandard.add_argument('--hyperParamSearch', metavar='STRING', action='store',
help='The type of method used tosearch the best set of hyper parameters', default='randomizedSearch') help='The type of method used to search the best set of hyper parameters',
groupStandard.add_argument('--metrics', metavar='STRING', action='store', nargs="+", default='randomizedSearch')
help='Metrics used in the experimentation, the first will be the one used in CV', groupStandard.add_argument('--metrics', metavar='STRING', action='store',
default=['']) help='Path to the pickle file describing the metricsused to analyze the performance',
groupStandard.add_argument('--nIter', metavar='INT', action='store', help='Number of itetarion in hyper parameter search', type=int, default='')
groupStandard.add_argument('--kwargs', metavar='STRING', action='store',
help='Path to the pickle file containing the key-words arguments used for classification',
default='')
groupStandard.add_argument('--nIter', metavar='INT', action='store',
help='Number of itetarion in hyper parameter search', type=int,
default=10) default=10)
args = parser.parse_args() args = parser.parse_args()
directory = args.directory directory = args.directory
name = args.name name = args.name
classifierName = args.cl_name
labelsNames = args.labelsNames labelsNames = args.labelsNames
classificationIndices = args.classificationIndices viewName = args.view
KFolds = args.KFolds with open(args.classificationIndices, 'rb') as handle:
classificationIndices = pickle.load(handle)
with open(args.KFolds, 'rb') as handle:
KFolds = pickle.load(handle)
nbCores = args.nbCores nbCores = args.nbCores
databaseType = None
path = args.pathF path = args.pathF
randomState = args.randomState with open(args.randomState, 'rb') as handle:
randomState = pickle.load(handle)
hyperParamSearch = args.hyperParamSearch hyperParamSearch = args.hyperParamSearch
metrics = args.metrics with open(args.metrics, 'rb') as handle:
metrics = pickle.load(handle)
nIter = args.nIter nIter = args.nIter
kwargs = args.kwargs with open(args.kwargs, 'rb') as handle:
kwargs = pickle.load(handle)
# Extract the data using MPI
X = None
Y = None
logfilename = "gen a goodlogfilename" databaseType = None
# Extract the data using MPI
X, Y = Dataset.getMonoviewShared(path, name, viewName)
logfile = directory + logfilename # Init log
if os.path.isfile(logfile + ".log"): logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + name + "-"+ viewName +"-" + classifierName +'-LOG'
if not os.path.exists(os.path.dirname(directory + logFileName)):
try:
os.makedirs(os.path.dirname(directory + logFileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
logFile = directory + logFileName
if os.path.isfile(logFile + ".log"):
for i in range(1, 20): for i in range(1, 20):
testFileName = logfilename + "-" + str(i) + ".log" testFileName = logFileName + "-" + str(i) + ".log"
if not os.path.isfile(directory + testFileName): if not (os.path.isfile(directory + testFileName)):
logfile = directory + testFileName logFile = directory + testFileName
break break
else: else:
logfile += ".log" logFile += ".log"
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG,
filemode='w') filemode='w')
if args.log: if args.log:
logging.getLogger().addHandler(logging.StreamHandler()) logging.getLogger().addHandler(logging.StreamHandler())
# Computing on multiple cores
res = ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, res = ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path,
randomState, hyperParamSearch=hyperParamSearch, randomState, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=nIter, **kwargs) metrics=metrics, nIter=nIter, **kwargs)
with open(directory + "res.pickle", "wb") as handle: with open(directory + "res.pickle", "wb") as handle:
pickle.dump(randomState, handle) pickle.dump(res, handle)
# Pickle the res in a file to be reused. # Pickle the res in a file to be reused.
......
...@@ -2,7 +2,7 @@ import logging ...@@ -2,7 +2,7 @@ import logging
import os import os
import select import select
import sys import sys
import h5py
import numpy as np import numpy as np
from scipy import sparse from scipy import sparse
...@@ -114,3 +114,10 @@ def input_(timeout=15): ...@@ -114,3 +114,10 @@ def input_(timeout=15):
else: else:
return "y" return "y"
def getMonoviewShared(path, name, viewName, labelsNames, classificationIndices):
"""ATM is not used with shared memory, but soon :)"""
HDF5_dataset_file = h5py.File(path + name + ".hdf5", "w")
X = HDF5_dataset_file.get(viewName).value
Y = HDF5_dataset_file.get("Labels").value
return X, Y
\ No newline at end of file
...@@ -76,7 +76,7 @@ def parseTheArgs(arguments): ...@@ -76,7 +76,7 @@ def parseTheArgs(arguments):
, default=['']) , default=[''])
groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store', groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
help='Determine which metric to use for randomSearch and optimization', default="f1_score") help='Determine which metric to use for randomSearch and optimization', default="f1_score")
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store', groupClass.add_argument('--CL_HPS_iter', metavar='INT', action='store',
help='Determine how many hyper parameters optimization tests to do', type=int, default=2) help='Determine how many hyper parameters optimization tests to do', type=int, default=2)
groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store', groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
help='Determine which hyperparamter search function use', default="randomizedSearch") help='Determine which hyperparamter search function use', default="randomizedSearch")
...@@ -239,7 +239,7 @@ def initRandomState(randomStateArg, directory): ...@@ -239,7 +239,7 @@ def initRandomState(randomStateArg, directory):
def initLogFile(args): def initLogFile(args):
"""Used to init the directory where the preds will be stored and the log file""" """Used to init the directory where the preds will be stored and the log file"""
resultDirectory = "../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/" resultDirectory = "../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join( logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
args.views) + "-" + args.name + "-LOG" args.views) + "-" + args.name + "-LOG"
if not os.path.exists(os.path.dirname(resultDirectory + logFileName)): if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
try: try:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment