Skip to content
Snippets Groups Projects
Commit e78c4e02 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Worked on cluster adaptation for monoview calssification

parent 61faae87
Branches
Tags
No related merge requests found
......@@ -167,7 +167,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_GS_iter, **argument)
nIter=args.CL_HPS_iter, **argument)
for argument in argumentDictionaries["Monoview"]]
logging.debug("Done:\t Monoview benchmark")
......@@ -181,7 +181,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
resultsMultiview += [
ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, **arguments)
metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
for arguments in argumentDictionaries["Multiview"]]
logging.debug("Done:\t Multiview benchmark")
......@@ -220,7 +220,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
coreIndex, args.type, args.pathF, randomState, labels,
hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter,
metrics=metrics, nIter=args.CL_HPS_iter,
**argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
logging.debug("Done:\t Monoview benchmark")
......@@ -238,7 +238,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
resultsMultiview += Parallel(n_jobs=nbCores)(
delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_HPS_iter,
**argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
logging.debug("Done:\t Multiview benchmark")
......@@ -280,7 +280,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non
resultsMonoview += [ExecMonoview(directory, X, Y, args.name, labelsNames, classificationIndices, kFolds,
1, args.type, args.pathF, randomState,
hyperParamSearch=hyperParamSearch, metrics=metrics,
nIter=args.CL_GS_iter, **arguments)]
nIter=args.CL_HPS_iter, **arguments)]
logging.debug("Done:\t Monoview benchmark")
logging.debug("Start:\t Multiview arguments initialization")
......@@ -294,7 +294,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non
resultsMultiview += [
ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type,
args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=args.CL_GS_iter, **arguments)]
metrics=metrics, nIter=args.CL_HPS_iter, **arguments)]
logging.debug("Done:\t Multiview benchmark")
return [flag, resultsMonoview, resultsMultiview]
......@@ -444,7 +444,7 @@ def execClassif(arguments):
# resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
# coreIndex, args.type, args.pathF, randomState,
# hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter,
# metrics=metrics, nIter=args.CL_HPS_iter,
# **arguments)
# for arguments in argumentDictionaries["Monoview"]]
# monoviewTime = time.time() - dataBaseTime - start
......@@ -456,7 +456,7 @@ def execClassif(arguments):
# resultsMultiview += [
# ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
# args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter, **arguments)
# metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
# for arguments in argumentDictionaries["Multiview"]]
# multiviewTime = time.time() - monoviewTime - dataBaseTime - start
#
......@@ -501,14 +501,14 @@ def execClassif(arguments):
# args.type,
# args.pathF,
# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter,
# metrics=metrics, nIter=args.CL_HPS_iter,
# **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
# for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
# else:
# resultsMultiview = [
# ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF,
# LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
# metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in
# metrics=metrics, nIter=args.CL_HPS_iter, **arguments) for arguments in
# argumentDictionaries["Multiview"]]
# multiviewTime = time.time() - monoviewTime - dataBaseTime - start
# if nbCores > 1:
......
......@@ -196,85 +196,106 @@ if __name__ == '__main__':
import argparse
import pickle
from ..utils import Dataset
parser = argparse.ArgumentParser(
description='This methods is used to execute a multiclass classification with one single view. ',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5")
groupStandard.add_argument('--name', metavar='STRING', action='store',
help='Name of Database (default: %(default)s)', default='DB')
help='Name of Database', default='Plausible')
groupStandard.add_argument('--cl_name', metavar='STRING', action='store',
help='THe name of the monoview classifier to use', default='DecisionTree')
groupStandard.add_argument('--view', metavar='STRING', action='store',
help='Name of Feature for Classification (default: %(default)s)', default='View0')
help='Name of the view used', default='View0')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',
help='Path to the views (default: %(default)s)', default='Results-FeatExtr/')
help='Path to the database hdf5 file', default='../../../Data/Plausible')
groupStandard.add_argument('--directory', metavar='STRING', action='store',
help='Path to the views (default: %(default)s)', default='Results-FeatExtr/')
help='Path of the output directory', default='')
groupStandard.add_argument('--labelsNames', metavar='STRING', action='store', nargs='+',
help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv')
help='Name of the labels used for classification', default=['Yes', 'No'])
groupStandard.add_argument('--classificationIndices', metavar='STRING', action='store',
help='Name of classLabels-Description CSV-file (default: %(default)s)',
default='classLabels-Description.csv')
groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=1)
help='Path to the classificationIndices pickle file',
default='')
groupStandard.add_argument('--KFolds', metavar='STRING', action='store',
help='Path to the kFolds pickle file',
default='')
groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all',
type=int, default=1)
groupStandard.add_argument('--randomState', metavar='INT', action='store',
help='Seed for the random state or pickable randomstate file', default=42)
groupStandard.add_argument('--hyperParamSearch', metavar='STRING', action='store',
help='The type of method used tosearch the best set of hyper parameters', default='randomizedSearch')
groupStandard.add_argument('--metrics', metavar='STRING', action='store', nargs="+",
help='Metrics used in the experimentation, the first will be the one used in CV',
default=[''])
groupStandard.add_argument('--nIter', metavar='INT', action='store', help='Number of itetarion in hyper parameter search', type=int,
help='The type of method used to search the best set of hyper parameters',
default='randomizedSearch')
groupStandard.add_argument('--metrics', metavar='STRING', action='store',
help='Path to the pickle file describing the metricsused to analyze the performance',
default='')
groupStandard.add_argument('--kwargs', metavar='STRING', action='store',
help='Path to the pickle file containing the key-words arguments used for classification',
default='')
groupStandard.add_argument('--nIter', metavar='INT', action='store',
help='Number of itetarion in hyper parameter search', type=int,
default=10)
args = parser.parse_args()
directory = args.directory
name = args.name
classifierName = args.cl_name
labelsNames = args.labelsNames
classificationIndices = args.classificationIndices
KFolds = args.KFolds
viewName = args.view
with open(args.classificationIndices, 'rb') as handle:
classificationIndices = pickle.load(handle)
with open(args.KFolds, 'rb') as handle:
KFolds = pickle.load(handle)
nbCores = args.nbCores
databaseType = None
path = args.pathF
randomState = args.randomState
with open(args.randomState, 'rb') as handle:
randomState = pickle.load(handle)
hyperParamSearch = args.hyperParamSearch
metrics = args.metrics
with open(args.metrics, 'rb') as handle:
metrics = pickle.load(handle)
nIter = args.nIter
kwargs = args.kwargs
# Extract the data using MPI
X = None
Y = None
with open(args.kwargs, 'rb') as handle:
kwargs = pickle.load(handle)
logfilename = "gen a goodlogfilename"
databaseType = None
# Extract the data using MPI
X, Y = Dataset.getMonoviewShared(path, name, viewName)
logfile = directory + logfilename
if os.path.isfile(logfile + ".log"):
# Init log
logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + name + "-"+ viewName +"-" + classifierName +'-LOG'
if not os.path.exists(os.path.dirname(directory + logFileName)):
try:
os.makedirs(os.path.dirname(directory + logFileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
logFile = directory + logFileName
if os.path.isfile(logFile + ".log"):
for i in range(1, 20):
testFileName = logfilename + "-" + str(i) + ".log"
if not os.path.isfile(directory + testFileName):
logfile = directory + testFileName
testFileName = logFileName + "-" + str(i) + ".log"
if not (os.path.isfile(directory + testFileName)):
logFile = directory + testFileName
break
else:
logfile += ".log"
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG,
logFile += ".log"
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
filemode='w')
if args.log:
logging.getLogger().addHandler(logging.StreamHandler())
# Computing on multiple cores
res = ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path,
randomState, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=nIter, **kwargs)
with open(directory + "res.pickle", "wb") as handle:
pickle.dump(randomState, handle)
pickle.dump(res, handle)
# Pickle the res in a file to be reused.
......
......@@ -2,7 +2,7 @@ import logging
import os
import select
import sys
import h5py
import numpy as np
from scipy import sparse
......@@ -114,3 +114,10 @@ def input_(timeout=15):
else:
return "y"
def getMonoviewShared(path, name, viewName, labelsNames, classificationIndices):
"""ATM is not used with shared memory, but soon :)"""
HDF5_dataset_file = h5py.File(path + name + ".hdf5", "w")
X = HDF5_dataset_file.get(viewName).value
Y = HDF5_dataset_file.get("Labels").value
return X, Y
\ No newline at end of file
......@@ -76,7 +76,7 @@ def parseTheArgs(arguments):
, default=[''])
groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
help='Determine which metric to use for randomSearch and optimization', default="f1_score")
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
groupClass.add_argument('--CL_HPS_iter', metavar='INT', action='store',
help='Determine how many hyper parameters optimization tests to do', type=int, default=2)
groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
help='Determine which hyperparamter search function use', default="randomizedSearch")
......@@ -239,7 +239,7 @@ def initRandomState(randomStateArg, directory):
def initLogFile(args):
"""Used to init the directory where the preds will be stored and the log file"""
resultDirectory = "../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
args.views) + "-" + args.name + "-LOG"
if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
try:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment