Skip to content
Snippets Groups Projects
Commit 0683bb62 authored by bbauvin's avatar bbauvin
Browse files

Refactored Execfile

parent bcb38103
No related branches found
No related tags found
No related merge requests found
# Import built-in modules # Import built-in modules
import argparse
import pkgutil # for TimeStamp in CSVFile import pkgutil # for TimeStamp in CSVFile
import os import os
import time import time
import sys import sys
import select
import logging import logging
import errno import errno
import cPickle
# Import 3rd party modules # Import 3rd party modules
from joblib import Parallel, delayed from joblib import Parallel, delayed
import numpy as np import numpy as np
import math import math
import matplotlib import matplotlib
import sklearn
# Import own modules # Import own modules
import Multiview import Multiview
...@@ -25,6 +21,7 @@ from Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore ...@@ -25,6 +21,7 @@ from Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore
import Multiview.GetMultiviewDb as DB import Multiview.GetMultiviewDb as DB
from Versions import testVersions from Versions import testVersions
from ResultAnalysis import resultAnalysis, analyzeLabels, analyzeIterResults from ResultAnalysis import resultAnalysis, analyzeLabels, analyzeIterResults
from utils import execution, Dataset
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
...@@ -33,96 +30,6 @@ __status__ = "Prototype" # Production, Development, Prototype ...@@ -33,96 +30,6 @@ __status__ = "Prototype" # Production, Development, Prototype
matplotlib.use('Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure matplotlib.use('Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure
def initLogFile(args):
resultDirectory = "../../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
args.views) + "-" + args.name + "-LOG"
if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
try:
os.makedirs(os.path.dirname(resultDirectory + logFileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
logFile = resultDirectory + logFileName
if os.path.isfile(logFile + ".log"):
for i in range(1, 20):
testFileName = logFileName + "-" + str(i) + ".log"
if not (os.path.isfile(resultDirectory + testFileName)):
logFile = resultDirectory + testFileName
break
else:
logFile += ".log"
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
filemode='w')
if args.log:
logging.getLogger().addHandler(logging.StreamHandler())
return resultDirectory
def input_(timeout=15):
print "You have " + str(timeout) + " seconds to stop the script by typing n"
i, o, e = select.select([sys.stdin], [], [], timeout)
if i:
return sys.stdin.readline().strip()
else:
return "y"
def confirm(resp=True, timeout=15):
ans = input_(timeout)
if not ans:
return resp
if ans not in ['y', 'Y', 'n', 'N']:
print 'please enter y or n.'
if ans == 'y' or ans == 'Y':
return True
if ans == 'n' or ans == 'N':
return False
def initMultipleDatasets(args, nbCores):
"""Used to create copies of the dataset if multicore computation is used
Needs arg.pathF and arg.name"""
if nbCores > 1:
if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
logging.debug("Info:\t Enough copies of the dataset are already available")
pass
else:
logging.debug("Start:\t Creating " + str(nbCores) + " temporary datasets for multiprocessing")
logging.warning(" WARNING : /!\ This may use a lot of HDD storage space : " +
str(os.path.getsize(args.pathF + args.name + ".hdf5") * nbCores / float(
1024) / 1000 / 1000) + " Gbytes /!\ ")
confirmation = confirm()
if not confirmation:
sys.exit(0)
else:
datasetFiles = DB.copyHDF5(args.pathF, args.name, nbCores)
logging.debug("Start:\t Creating datasets for multiprocessing")
return datasetFiles
def initViews(DATASET, args):
"""Used to return the views names that will be used by the algos, their indices and all the views names
Needs args.views"""
NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
if args.views != [""]:
allowedViews = args.views
allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW) if
str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
viewsIndices = [viewIndex for viewIndex in range(NB_VIEW) if
str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
return views, viewsIndices, allViews
else:
views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
viewsIndices = np.arange(NB_VIEW)
allViews = views
return views, viewsIndices, allViews
def initBenchmark(args): def initBenchmark(args):
"""Used to create a list of all the algorithm packages names used for the benchmark """Used to create a list of all the algorithm packages names used for the benchmark
Needs args.CL_type, args.CL_algos_multiview, args.MU_types, args.FU_types, args.FU_late_methods, Needs args.CL_type, args.CL_algos_multiview, args.MU_types, args.FU_types, args.FU_late_methods,
...@@ -241,43 +148,6 @@ def arangeMetrics(metrics, metricPrinc): ...@@ -241,43 +148,6 @@ def arangeMetrics(metrics, metricPrinc):
return metrics return metrics
def genSplits(statsIter, indices, DATASET, splitRatio, statsIterRandomStates):
if statsIter > 1:
splits = []
for randomState in statsIterRandomStates:
trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices,
DATASET.get("Labels").value,
test_size=splitRatio,
random_state=randomState)
splits.append([trainIndices, testIndices])
return splits
else:
trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices, DATASET.get("Labels").value,
test_size=splitRatio,
random_state=statsIterRandomStates)
return trainIndices, testIndices
def genKFolds(statsIter, nbFolds, statsIterRandomStates):
if statsIter > 1:
foldsList = []
for randomState in statsIterRandomStates:
foldsList.append(sklearn.model_selection.KFold(n_splits=nbFolds, random_state=randomState))
return foldsList
else:
return sklearn.model_selection.KFold(n_splits=nbFolds, random_state=statsIterRandomStates)
def genDirecortiesNames(directory, statsIter):
if statsIter > 1:
directories = []
for i in range(statsIter):
directories.append(directory + "iter_" + str(i + 1) + "/")
return directories
else:
return directory
def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices,
kFolds, kFolds,
randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start, randomState, hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, start,
...@@ -329,7 +199,8 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, ...@@ -329,7 +199,8 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory,
randomState, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, randomState, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start,
benchmark, views): benchmark, views):
print classificationIndices[0] print classificationIndices[0]
import pdb;pdb.set_trace() import pdb;
pdb.set_trace()
np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",")
resultsMonoview = [] resultsMonoview = []
...@@ -401,203 +272,21 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, ...@@ -401,203 +272,21 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory,
return results return results
def initRandomState(randomStateArg, directory): # _______________ #
if randomStateArg is None: # __ EXECUTION __ #
randomState = np.random.RandomState(randomStateArg) # _______________ #
else:
try:
seed = int(randomStateArg)
randomState = np.random.RandomState(seed)
except ValueError:
fileName = randomStateArg
with open(fileName, 'rb') as handle:
randomState = cPickle.load(handle)
with open(directory + "randomState.pickle", "wb") as handle:
cPickle.dump(randomState, handle)
return randomState
testVersions() testVersions()
parser = argparse.ArgumentParser(
description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
default='Plausible')
groupStandard.add_argument('--type', metavar='STRING', action='store',
help='Type of database : .hdf5 or .csv (default: %(default)s)',
default='.hdf5')
groupStandard.add_argument('--views', metavar='STRING', action='store', nargs="+",
help='Name of the views selected for learning (default: %(default)s)',
default=[''])
groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)',
default='/home/bbauvin/Documents/Data/Data_multi_omics/')
groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
help='Niceness for the process', default=0)
groupStandard.add_argument('--randomState', metavar='STRING', action='store',
help="The random state seed to use or a file where we can find it's get_state", default=None)
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
help='Determine the split between learning and validation sets', type=float,
default=0.2)
groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
type=int, default=2)
groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
default=2)
groupClass.add_argument('--CL_classes', metavar='STRING', action='store', nargs="+",
help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
'selected ex. walrus mole leopard', default=["yes", "no"])
groupClass.add_argument('--CL_type', metavar='STRING', action='store', nargs="+",
help='Determine whether to use Multiview and/or Monoview, or Benchmark',
default=['Benchmark'])
groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store', nargs="+",
help='Determine which monoview classifier to use if empty, considering all',
default=[''])
groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store', nargs="+",
help='Determine which multiview classifier to use if empty, considering all',
default=[''])
groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=2)
groupClass.add_argument('--CL_statsiter', metavar='INT', action='store',
help="Number of iteration for each algorithm to mean results if using multiple cores, it's highly recommended to use statsiter mod(nbCores) = 0",
type=int,
default=2)
groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+",
help='Determine which metrics to use, separate metric and configuration with ":".'
' If multiple, separate with space. If no metric is specified, '
'considering all with accuracy for classification '
, default=[''])
groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
help='Determine which metric to use for randomSearch and optimization', default="f1_score")
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
help='Determine how many Randomized grid search tests to do', type=int, default=2)
groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
help='Determine which hyperparamter search function use', default="randomizedSearch")
groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RandomForest_trees', metavar='INT', type=int, action='store', help='Number max trees',
default=25)
groupRF.add_argument('--CL_RandomForest_max_depth', metavar='INT', type=int, action='store',
help='Max depth for the trees',
default=5)
groupRF.add_argument('--CL_RandomForest_criterion', metavar='STRING', action='store', help='Criterion for the trees',
default="entropy")
groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
groupSVMLinear.add_argument('--CL_SVMLinear_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1)
groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
groupSVMRBF.add_argument('--CL_SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1)
groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
groupSVMPoly.add_argument('--CL_SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1)
groupSVMPoly.add_argument('--CL_SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used',
default=2)
groupAdaboost = parser.add_argument_group('Adaboost arguments')
groupAdaboost.add_argument('--CL_Adaboost_n_est', metavar='INT', type=int, action='store', help='Number of estimators',
default=2)
groupAdaboost.add_argument('--CL_Adaboost_b_est', metavar='STRING', action='store', help='Estimators',
default='DecisionTreeClassifier')
groupDT = parser.add_argument_group('Decision Trees arguments')
groupDT.add_argument('--CL_DecisionTree_depth', metavar='INT', type=int, action='store',
help='Determine max depth for Decision Trees', default=3)
groupDT.add_argument('--CL_DecisionTree_criterion', metavar='STRING', action='store',
help='Determine max depth for Decision Trees', default="entropy")
groupDT.add_argument('--CL_DecisionTree_splitter', metavar='STRING', action='store',
help='Determine criterion for Decision Trees', default="random")
groupSGD = parser.add_argument_group('SGD arguments')
groupSGD.add_argument('--CL_SGD_alpha', metavar='FLOAT', type=float, action='store',
help='Determine alpha for SGDClassifier', default=0.1)
groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
help='Determine loss for SGDClassifier', default='log')
groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
help='Determine penalty for SGDClassifier', default='l2')
groupKNN = parser.add_argument_group('KNN arguments')
groupKNN.add_argument('--CL_KNN_neigh', metavar='INT', type=int, action='store',
help='Determine number of neighbors for KNN', default=1)
groupKNN.add_argument('--CL_KNN_weights', metavar='STRING', action='store',
help='Determine number of neighbors for KNN', default="distance")
groupKNN.add_argument('--CL_KNN_algo', metavar='STRING', action='store',
help='Determine number of neighbors for KNN', default="auto")
groupKNN.add_argument('--CL_KNN_p', metavar='INT', type=int, action='store',
help='Determine number of neighbors for KNN', default=1)
groupSCM = parser.add_argument_group('SCM arguments')
groupSCM.add_argument('--CL_SCM_max_rules', metavar='INT', type=int, action='store',
help='Max number of rules for SCM', default=1)
groupSCM.add_argument('--CL_SCM_p', metavar='FLOAT', type=float, action='store',
help='Max number of rules for SCM', default=1.0)
groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store',
help='Max number of rules for SCM', default="conjunction")
groupMumbo = parser.add_argument_group('Mumbo arguments')
groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+",
help='Determine which monoview classifier to use with Mumbo',
default=[''])
groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:',
default=[''])
groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
help='Max number of iteration, min number of iteration, convergence threshold', type=float,
default=[10, 1, 0.01])
groupMumbo.add_argument('--MU_combination', action='store_true',
help='Try all the monoview classifiers combinations for each view',
default=False)
groupFusion = parser.add_argument_group('Fusion arguments')
groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+",
help='Determine which type of fusion to use',
default=[''])
groupEarlyFusion = parser.add_argument_group('Early Fusion arguments')
groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+",
help='Determine which early fusion method of fusion to use',
default=[''])
groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', action='store', nargs='+',
help='Configuration for the early fusion methods separate '
'method by space and values by :',
default=[''])
groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifiers used separate classifier by space '
'and configs must be of form argument1_name:value,argument2_name:value',
default=[''])
groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', action='store', nargs='+',
help='Name of the classifiers used for each early fusion method', default=[''])
groupLateFusion = parser.add_argument_group('Late Early Fusion arguments')
groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+",
help='Determine which late fusion method of fusion to use',
default=[''])
groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the fusion method', default=[''])
groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifiers used', default=[''])
groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', action='store', nargs="+",
help='Names of the classifier used for late fusion', default=[''])
groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store',
help='Determine which method to use to select the monoview classifiers',
default="intersect")
start = time.time() start = time.time()
args = parser.parse_args() args = execution.parseTheArgs(sys.argv[1:])
os.nice(args.nice) os.nice(args.nice)
nbCores = args.CL_cores nbCores = args.CL_cores
statsIter = args.CL_statsiter statsIter = args.CL_statsiter
hyperParamSearch = args.CL_HPS_type hyperParamSearch = args.CL_HPS_type
directory = initLogFile(args) directory = execution.initLogFile(args)
randomState = initRandomState(args.randomState, directory) randomState = execution.initRandomState(args.randomState, directory)
if statsIter > 1: if statsIter > 1:
statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)] statsIterRandomStates = [np.random.RandomState(randomState.randint(500)) for _ in range(statsIter)]
else: else:
...@@ -613,13 +302,13 @@ DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args ...@@ -613,13 +302,13 @@ DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args
datasetLength = DATASET.get("Metadata").attrs["datasetLength"] datasetLength = DATASET.get("Metadata").attrs["datasetLength"]
indices = np.arange(datasetLength) indices = np.arange(datasetLength)
classificationIndices = genSplits(statsIter, indices, DATASET, args.CL_split, statsIterRandomStates) classificationIndices = execution.genSplits(statsIter, indices, DATASET, args.CL_split, statsIterRandomStates)
kFolds = genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates) kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates)
datasetFiles = initMultipleDatasets(args, nbCores) datasetFiles = Dataset.initMultipleDatasets(args, nbCores)
views, viewsIndices, allViews = initViews(DATASET, args) views, viewsIndices, allViews = execution.initViews(DATASET, args)
if not views: if not views:
raise ValueError, "Empty views list, modify selected views to match dataset " + args.views raise ValueError, "Empty views list, modify selected views to match dataset " + args.views
...@@ -647,7 +336,7 @@ dataBaseTime = time.time() - start ...@@ -647,7 +336,7 @@ dataBaseTime = time.time() - start
argumentDictionaries = {"Monoview": [], "Multiview": []} argumentDictionaries = {"Monoview": [], "Multiview": []}
argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS, argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS,
initKWARGS) initKWARGS)
directories = genDirecortiesNames(directory, statsIter) directories = execution.genDirecortiesNames(directory, statsIter)
if statsIter > 1: if statsIter > 1:
for statIterIndex in range(statsIter): for statIterIndex in range(statsIter):
......
from scipy import sparse from scipy import sparse
import numpy as np import numpy as np
import Multiview.GetMultiviewDb as DB
import logging
import os
import sys
import select
def getV(DATASET, viewIndex, usedIndices=None): def getV(DATASET, viewIndex, usedIndices=None):
...@@ -59,3 +64,46 @@ def extractSubset(matrix, usedIndices): ...@@ -59,3 +64,46 @@ def extractSubset(matrix, usedIndices):
return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1])) return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1]))
else: else:
return matrix[usedIndices] return matrix[usedIndices]
def initMultipleDatasets(args, nbCores):
"""Used to create copies of the dataset if multicore computation is used
Needs arg.pathF and arg.name"""
if nbCores > 1:
if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
logging.debug("Info:\t Enough copies of the dataset are already available")
pass
else:
logging.debug("Start:\t Creating " + str(nbCores) + " temporary datasets for multiprocessing")
logging.warning(" WARNING : /!\ This may use a lot of HDD storage space : " +
str(os.path.getsize(args.pathF + args.name + ".hdf5") * nbCores / float(
1024) / 1000 / 1000) + " Gbytes /!\ ")
confirmation = confirm()
if not confirmation:
sys.exit(0)
else:
datasetFiles = DB.copyHDF5(args.pathF, args.name, nbCores)
logging.debug("Start:\t Creating datasets for multiprocessing")
return datasetFiles
def confirm(resp=True, timeout=15):
ans = input_(timeout)
if not ans:
return resp
if ans not in ['y', 'Y', 'n', 'N']:
print 'please enter y or n.'
if ans == 'y' or ans == 'Y':
return True
if ans == 'n' or ans == 'N':
return False
def input_(timeout=15):
print "You have " + str(timeout) + " seconds to stop the script by typing n"
i, o, e = select.select([sys.stdin], [], [], timeout)
if i:
return sys.stdin.readline().strip()
else:
return "y"
import argparse
import numpy as np
import cPickle
import time
import os
import errno
import logging
import sklearn
def parseTheArgs(arguments):
parser = argparse.ArgumentParser(
description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
default='Plausible')
groupStandard.add_argument('--type', metavar='STRING', action='store',
help='Type of database : .hdf5 or .csv (default: %(default)s)',
default='.hdf5')
groupStandard.add_argument('--views', metavar='STRING', action='store', nargs="+",
help='Name of the views selected for learning (default: %(default)s)',
default=[''])
groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)',
default='/home/bbauvin/Documents/Data/Data_multi_omics/')
groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
help='Niceness for the process', default=0)
groupStandard.add_argument('--randomState', metavar='STRING', action='store',
help="The random state seed to use or a file where we can find it's get_state", default=None)
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
help='Determine the split between learning and validation sets', type=float,
default=0.2)
groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
type=int, default=2)
groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
default=2)
groupClass.add_argument('--CL_classes', metavar='STRING', action='store', nargs="+",
help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
'selected ex. walrus mole leopard', default=["yes", "no"])
groupClass.add_argument('--CL_type', metavar='STRING', action='store', nargs="+",
help='Determine whether to use Multiview and/or Monoview, or Benchmark',
default=['Benchmark'])
groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store', nargs="+",
help='Determine which monoview classifier to use if empty, considering all',
default=[''])
groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store', nargs="+",
help='Determine which multiview classifier to use if empty, considering all',
default=[''])
groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=2)
groupClass.add_argument('--CL_statsiter', metavar='INT', action='store',
help="Number of iteration for each algorithm to mean results if using multiple cores, it's highly recommended to use statsiter mod(nbCores) = 0",
type=int,
default=2)
groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+",
help='Determine which metrics to use, separate metric and configuration with ":".'
' If multiple, separate with space. If no metric is specified, '
'considering all with accuracy for classification '
, default=[''])
groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
help='Determine which metric to use for randomSearch and optimization', default="f1_score")
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
help='Determine how many Randomized grid search tests to do', type=int, default=2)
groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
help='Determine which hyperparamter search function use', default="randomizedSearch")
groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RandomForest_trees', metavar='INT', type=int, action='store', help='Number max trees',
default=25)
groupRF.add_argument('--CL_RandomForest_max_depth', metavar='INT', type=int, action='store',
help='Max depth for the trees',
default=5)
groupRF.add_argument('--CL_RandomForest_criterion', metavar='STRING', action='store', help='Criterion for the trees',
default="entropy")
groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
groupSVMLinear.add_argument('--CL_SVMLinear_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1)
groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
groupSVMRBF.add_argument('--CL_SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1)
groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
groupSVMPoly.add_argument('--CL_SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used',
default=1)
groupSVMPoly.add_argument('--CL_SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used',
default=2)
groupAdaboost = parser.add_argument_group('Adaboost arguments')
groupAdaboost.add_argument('--CL_Adaboost_n_est', metavar='INT', type=int, action='store', help='Number of estimators',
default=2)
groupAdaboost.add_argument('--CL_Adaboost_b_est', metavar='STRING', action='store', help='Estimators',
default='DecisionTreeClassifier')
groupDT = parser.add_argument_group('Decision Trees arguments')
groupDT.add_argument('--CL_DecisionTree_depth', metavar='INT', type=int, action='store',
help='Determine max depth for Decision Trees', default=3)
groupDT.add_argument('--CL_DecisionTree_criterion', metavar='STRING', action='store',
help='Determine max depth for Decision Trees', default="entropy")
groupDT.add_argument('--CL_DecisionTree_splitter', metavar='STRING', action='store',
help='Determine criterion for Decision Trees', default="random")
groupSGD = parser.add_argument_group('SGD arguments')
groupSGD.add_argument('--CL_SGD_alpha', metavar='FLOAT', type=float, action='store',
help='Determine alpha for SGDClassifier', default=0.1)
groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
help='Determine loss for SGDClassifier', default='log')
groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
help='Determine penalty for SGDClassifier', default='l2')
groupKNN = parser.add_argument_group('KNN arguments')
groupKNN.add_argument('--CL_KNN_neigh', metavar='INT', type=int, action='store',
help='Determine number of neighbors for KNN', default=1)
groupKNN.add_argument('--CL_KNN_weights', metavar='STRING', action='store',
help='Determine number of neighbors for KNN', default="distance")
groupKNN.add_argument('--CL_KNN_algo', metavar='STRING', action='store',
help='Determine number of neighbors for KNN', default="auto")
groupKNN.add_argument('--CL_KNN_p', metavar='INT', type=int, action='store',
help='Determine number of neighbors for KNN', default=1)
groupSCM = parser.add_argument_group('SCM arguments')
groupSCM.add_argument('--CL_SCM_max_rules', metavar='INT', type=int, action='store',
help='Max number of rules for SCM', default=1)
groupSCM.add_argument('--CL_SCM_p', metavar='FLOAT', type=float, action='store',
help='Max number of rules for SCM', default=1.0)
groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store',
help='Max number of rules for SCM', default="conjunction")
groupMumbo = parser.add_argument_group('Mumbo arguments')
groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+",
help='Determine which monoview classifier to use with Mumbo',
default=[''])
groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:',
default=[''])
groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
help='Max number of iteration, min number of iteration, convergence threshold', type=float,
default=[10, 1, 0.01])
groupMumbo.add_argument('--MU_combination', action='store_true',
help='Try all the monoview classifiers combinations for each view',
default=False)
groupFusion = parser.add_argument_group('Fusion arguments')
groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+",
help='Determine which type of fusion to use',
default=[''])
groupEarlyFusion = parser.add_argument_group('Early Fusion arguments')
groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+",
help='Determine which early fusion method of fusion to use',
default=[''])
groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', action='store', nargs='+',
help='Configuration for the early fusion methods separate '
'method by space and values by :',
default=[''])
groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifiers used separate classifier by space '
'and configs must be of form argument1_name:value,argument2_name:value',
default=[''])
groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', action='store', nargs='+',
help='Name of the classifiers used for each early fusion method', default=[''])
groupLateFusion = parser.add_argument_group('Late Early Fusion arguments')
groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+",
help='Determine which late fusion method of fusion to use',
default=[''])
groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the fusion method', default=[''])
groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifiers used', default=[''])
groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', action='store', nargs="+",
help='Names of the classifier used for late fusion', default=[''])
groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store',
help='Determine which method to use to select the monoview classifiers',
default="intersect")
args = parser.parse_args(arguments)
return args
def initRandomState(randomStateArg, directory):
if randomStateArg is None:
randomState = np.random.RandomState(randomStateArg)
else:
try:
seed = int(randomStateArg)
randomState = np.random.RandomState(seed)
except ValueError:
fileName = randomStateArg
with open(fileName, 'rb') as handle:
randomState = cPickle.load(handle)
with open(directory + "randomState.pickle", "wb") as handle:
cPickle.dump(randomState, handle)
return randomState
def initLogFile(args):
resultDirectory = "../../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
args.views) + "-" + args.name + "-LOG"
if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
try:
os.makedirs(os.path.dirname(resultDirectory + logFileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
logFile = resultDirectory + logFileName
if os.path.isfile(logFile + ".log"):
for i in range(1, 20):
testFileName = logFileName + "-" + str(i) + ".log"
if not (os.path.isfile(resultDirectory + testFileName)):
logFile = resultDirectory + testFileName
break
else:
logFile += ".log"
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
filemode='w')
if args.log:
logging.getLogger().addHandler(logging.StreamHandler())
return resultDirectory
def genSplits(statsIter, indices, DATASET, splitRatio, statsIterRandomStates):
if statsIter > 1:
splits = []
for randomState in statsIterRandomStates:
trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices,
DATASET.get("Labels").value,
test_size=splitRatio,
random_state=randomState)
splits.append([trainIndices, testIndices])
return splits
else:
trainIndices, testIndices, a, b = sklearn.model_selection.train_test_split(indices, DATASET.get("Labels").value,
test_size=splitRatio,
random_state=statsIterRandomStates)
return trainIndices, testIndices
def genKFolds(statsIter, nbFolds, statsIterRandomStates):
if statsIter > 1:
foldsList = []
for randomState in statsIterRandomStates:
foldsList.append(sklearn.model_selection.KFold(n_splits=nbFolds, random_state=randomState))
return foldsList
else:
return sklearn.model_selection.KFold(n_splits=nbFolds, random_state=statsIterRandomStates)
def initViews(DATASET, args):
"""Used to return the views names that will be used by the algos, their indices and all the views names
Needs args.views"""
NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
if args.views != [""]:
allowedViews = args.views
allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW) if
str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
viewsIndices = [viewIndex for viewIndex in range(NB_VIEW) if
str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews]
return views, viewsIndices, allViews
else:
views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
viewsIndices = np.arange(NB_VIEW)
allViews = views
return views, viewsIndices, allViews
def genDirecortiesNames(directory, statsIter):
if statsIter > 1:
directories = []
for i in range(statsIter):
directories.append(directory + "iter_" + str(i + 1) + "/")
return directories
else:
return directory
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment