Skip to content
Snippets Groups Projects
Commit b2a46bd6 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Simplified ExecClassif

parent eadfe37f
No related branches found
No related tags found
No related merge requests found
......@@ -33,146 +33,7 @@ __status__ = "Prototype" # Production, Development, P
testVersions()
parser = argparse.ArgumentParser(
description='This file is used to benchmark the accuracies fo multiple classification algorithm on multiview data.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
default='Plausible')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of database : .hdf5 or .csv',
default='.hdf5')
groupStandard.add_argument('--views', metavar='STRING', action='store',help='Name of the views selected for learning',
default='')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',help='Path to the views (default: %(default)s)',
default='/home/bbauvin/Documents/Data/Data_multi_omics/')
groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
help='Niceness for the process', default=0)
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
help='Determine the learning rate if > 1.0, number of fold for cross validation', type=float,
default=0.7)
groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
type=int, default=2 )
groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
default=2)
groupClass.add_argument('--CL_classes', metavar='STRING', action='store',
help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
'selected ex. walrus:mole:leopard', default="jambon:poney")
groupClass.add_argument('--CL_type', metavar='STRING', action='store',
help='Determine whether to use Multiview, Monoview, or Benchmark, separate with : if multiple',
default='Benchmark')
# groupClass.add_argument('--CL_algorithm', metavar='STRING', action='store',
# help='Determine which classifier to use, if empty, considering all', default='')
groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store',
help='Determine which monoview classifier to use, separate with : if multiple, if empty, considering all', default='')
groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store',
help='Determine which multiview classifier to use, separate with : if multiple, if empty, considering all', default='')
groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=1)
groupClass.add_argument('--CL_statsiter', metavar='INT', action='store', help='Number of iteration for each algorithm to mean results', type=int,
default=2)
groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+",
help='Determine which metrics to use, separate metric and configuration with ":". If multiple, separate with space. If no metric is specified, considering all with accuracy for classification '
'first one will be used for classification', default=[''])
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
help='Determine how many Randomized grid search tests to do', type=int, default=2)
groupClass.add_argument('--CL_GS_type', metavar='STRING', action='store',
help='Determine which hyperparamter search function use', default="randomizedSearch")
groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees',
default='25 75 125 175')
groupRF.add_argument('--CL_RF_max_depth', metavar='STRING', action='store', help='GridSearch: Determine the trees',
default='5:10:15')
groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
groupSVMLinear.add_argument('--CL_SVML_C', metavar='STRING', action='store', help='GridSearch : Penalty parameters used',
default='1:10:100:1000')
groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
groupSVMRBF.add_argument('--CL_SVMR_C', metavar='STRING', action='store', help='GridSearch : Penalty parameters used',
default='1:10:100:1000')
groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
groupSVMPoly.add_argument('--CL_SVMP_C', metavar='STRING', action='store', help='GridSearch : Penalty parameters used',
default='1:10:100:1000')
groupSVMPoly.add_argument('--CL_SVMP_deg', metavar='STRING', action='store', help='GridSearch : Degree parameters used',
default='1:2:5:10')
groupAdaboost = parser.add_argument_group('Adaboost arguments')
groupAdaboost.add_argument('--CL_Ada_n_est', metavar='STRING', action='store', help='GridSearch : Penalty parameters used',
default='1:10:100:1000')
groupAdaboost.add_argument('--CL_Ada_b_est', metavar='STRING', action='store', help='GridSearch : Degree parameters used',
default='DecisionTreeClassifier')
groupRF = parser.add_argument_group('Decision Trees arguments')
groupRF.add_argument('--CL_DT_depth', metavar='STRING', action='store',
help='GridSearch: Determine max depth for Decision Trees', default='1:3:5:7')
groupSGD = parser.add_argument_group('SGD arguments')
groupSGD.add_argument('--CL_SGD_alpha', metavar='STRING', action='store',
help='GridSearch: Determine alpha for SGDClassifier', default='0.1:0.2:0.5:0.9')
groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
help='GridSearch: Determine loss for SGDClassifier', default='log')
groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
help='GridSearch: Determine penalty for SGDClassifier', default='l2')
groupSGD = parser.add_argument_group('KNN arguments')
groupSGD.add_argument('--CL_KNN_neigh', metavar='STRING', action='store',
help='GridSearch: Determine number of neighbors for KNN', default='1:5:10:15')
groupSGD = parser.add_argument_group('SCM arguments')
groupSGD.add_argument('--CL_SCM_max_rules', metavar='STRING', action='store',
help='Max number of rules for SCM', default='1')
groupMumbo = parser.add_argument_group('Mumbo arguments')
groupMumbo.add_argument('--MU_types', metavar='STRING', action='store',
help='Determine which monoview classifier to use with Mumbo',default='DecisionTree')
groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifier in Mumbo',
default=['3:1.0', '3:1.0', '3:1.0','3:1.0'])
groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
help='Max number of iteration, min number of iteration, convergence threshold', type=float,
default=[10,1, 0.01])
groupFusion = parser.add_argument_group('Fusion arguments')
groupFusion.add_argument('--FU_types', metavar='STRING', action='store',
help='Determine which type of fusion to use, if multiple separate with :',
default='LateFusion:EarlyFusion')
groupFusion.add_argument('--FU_early_methods', metavar='STRING', action='store',
help='Determine which early fusion method of fusion to use, if multiple separate with :',
default='')
groupFusion.add_argument('--FU_late_methods', metavar='STRING', action='store',
help='Determine which late fusion method of fusion to use, if multiple separate with :',
default='')
groupFusion.add_argument('--FU_method_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the fusion method', default=[''])
groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifiers used', default=[''])
groupFusion.add_argument('--FU_cl_names', metavar='STRING', action='store',
help='Names of the classifier used for fusion, one per view separated by :', default='')
groupFusion.add_argument('--FU_fixed', action='store_true',
help='Determine if you want fusion for the monoview classifier in the same order as written')
args = parser.parse_args()
os.nice(args.nice)
nbCores = args.CL_cores
statsIter = args.CL_statsiter
start = time.time()
if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plausible", "KMultiOmic"]:
getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
else:
getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:])
try:
gridSearch = args.CL_GS_type
except:
gridSearch = "None"
def initLogFile(args):
directory = os.path.dirname(os.path.abspath(__file__)) + "/Results/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-CMultiV-" + args.CL_type + "-" + "_".join(args.views.split(":")) + "-" + args.name + \
"-LOG"
......@@ -190,9 +51,7 @@ logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=lo
if args.log:
logging.getLogger().addHandler(logging.StreamHandler())
DATASET, LABELS_DICTIONARY = getDatabase(args.views.split(":"), args.pathF, args.name, len(args.CL_classes), args.CL_classes)
datasetLength = DATASET.get("Metadata").attrs["datasetLength"]
def initMultipleDatasets(args, nbCores):
if nbCores>1:
if DB.datasetsAlreadyExist(args.pathF, args.name, nbCores):
logging.debug("Info:\t Enough copies of the dataset are already available")
......@@ -204,28 +63,23 @@ if nbCores>1:
time.sleep(5)
datasetFiles = DB.copyHDF5(args.pathF, args.name, nbCores)
logging.debug("Start:\t Creating datasets for multiprocessing")
return datasetFiles
def initViews(DATASET, args):
NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
if args.views!="":
allowedViews = args.views.split(":")
allViews = [str(DATASET.get("View"+str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
views = [str(DATASET.get("View"+str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW) if str(DATASET.get("View"+str(viewIndex)).attrs["name"]) in allowedViews]
viewsIndices = [viewIndex for viewIndex in range(NB_VIEW) if str(DATASET.get("View"+str(viewIndex)).attrs["name"]) in allowedViews]
return views, viewsIndices, allViews
else:
views = [str(DATASET.get("View"+str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
viewsIndices = np.arange(NB_VIEW)
allViews = views
if not views:
raise ValueError, "Empty views list, modify selected views to match dataset "+args.views
NB_VIEW = len(views)
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
metrics = [metric.split(":") for metric in args.CL_metrics]
if metrics == [[""]]:
metricsNames = [name for _, name, isPackage
in pkgutil.iter_modules(['Metrics']) if not isPackage and name!="log_loss"]
metrics = [[metricName, None] for metricName in metricsNames]
return views, viewsIndices, allViews
logging.info("Start:\t Finding all available mono- & multiview algorithms")
def initBenchmark(args):
benchmark = {"Monoview":{}, "Multiview":[]}
if args.CL_type.split(":")==["Benchmark"]:
# if args.CL_algorithm=='':
......@@ -282,28 +136,11 @@ if "Multiview" in args.CL_type.strip(":"):
and (name!="SCM")]
else:
benchmark["Multiview"]["Fusion"]["Classifiers"] = args.CL_algos_monoview.split(":")
if "Monoview" in args.CL_type.strip(":"):
benchmark["Monoview"] = args.CL_algos_monoview.split(":")
return benchmark
fusionMethodConfig = [args.FU_method_config[0].split(":"), "b"]
RandomForestKWARGSInit = {"0":map(int, args.CL_RF_trees.split())[0], "1":map(int, args.CL_RF_max_depth.split(":"))[0]}
SVMLinearKWARGSInit = {"0":map(int, args.CL_SVML_C.split(":"))[0]}
SVMRBFKWARGSInit = {"0":map(int, args.CL_SVMR_C.split(":"))[0]}
SVMPolyKWARGSInit = {"0":map(int, args.CL_SVMP_C.split(":"))[0], '1':map(int, args.CL_SVMP_deg.split(":"))[0]}
DecisionTreeKWARGSInit = {"0":map(int, args.CL_DT_depth.split(":"))[0]}
SGDKWARGSInit = {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_penalty.split(":")[0],
"0":args.CL_SGD_loss.split(":")[0]}
KNNKWARGSInit = {"0": map(float, args.CL_KNN_neigh.split(":"))[0]}
AdaboostKWARGSInit = {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]}
SCMKWARGSInit = {"0":args.CL_SCM_max_rules.split(":")[0]}
dataBaseTime = time.time()-start
argumentDictionaries = {"Monoview": {}, "Multiview": []}
def initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS, kwargsInit):
try:
if benchmark["Monoview"]:
argumentDictionaries["Monoview"] = []
......@@ -311,44 +148,35 @@ try:
for classifier in benchmark["Monoview"]:
if classifier=="SCM":
if DATASET.get("View"+str(allViews.index(view))).attrs["binary"]:
arguments = {"args":{classifier+"KWARGS": globals()[classifier+"KWARGSInit"], "feat":view,
arguments = {"args":{classifier+"KWARGS": kwargsInit[classifier+"KWARGSInit"], "feat":view,
"CL_type": classifier, "nbClass":NB_CLASS}, "viewIndex":allViews.index(view)}
argumentDictionaries["Monoview"].append(arguments)
else:
pass
else:
arguments = {"args":{classifier+"KWARGS": globals()[classifier+"KWARGSInit"], "feat":view,
arguments = {"args":{classifier+"KWARGS": kwargsInit[classifier+"KWARGSInit"], "feat":view,
"CL_type": classifier, "nbClass":NB_CLASS}, "viewIndex":allViews.index(view)}
argumentDictionaries["Monoview"].append(arguments)
except:
pass
bestClassifiers = []
bestClassifiersConfigs = []
resultsMonoview = []
labelsNames = LABELS_DICTIONARY.values()
if nbCores>1:
nbExperiments = len(argumentDictionaries["Monoview"])
for stepIndex in range(int(math.ceil(float(nbExperiments)/nbCores))):
resultsMonoview+=(Parallel(n_jobs=nbCores)(
delayed(ExecMonoview_multicore)(args.name, labelsNames, args.CL_split, args.CL_nbFolds, coreIndex, args.type, args.pathF, statsIter, gridSearch=gridSearch,
metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
else:
resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])),
DATASET.get("Labels").value, args.name, labelsNames,
args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, statsIter,
gridSearch=gridSearch, metrics=metrics, nIter=args.CL_GS_iter,
**arguments)
for arguments in argumentDictionaries["Monoview"]])
accuracies = [[result[1][2][metrics[0][0]][2] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
monoviewTime = time.time()-dataBaseTime-start
if True:
return argumentDictionaries
def initKWARGS(args):
kwargsInit={
"RandomForestKWARGSInit" : {"0":map(int, args.CL_RF_trees.split())[0], "1":map(int, args.CL_RF_max_depth.split(":"))[0]},
"SVMLinearKWARGSInit" : {"0":map(int, args.CL_SVML_C.split(":"))[0]},
"SVMRBFKWARGSInit" : {"0":map(int, args.CL_SVMR_C.split(":"))[0]},
"SVMPolyKWARGSInit" : {"0":map(int, args.CL_SVMP_C.split(":"))[0], '1':map(int, args.CL_SVMP_deg.split(":"))[0]},
"DecisionTreeKWARGSInit" : {"0":map(int, args.CL_DT_depth.split(":"))[0]},
"SGDKWARGSInit" : {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_penalty.split(":")[0],
"0":args.CL_SGD_loss.split(":")[0]},
"KNNKWARGSInit" : {"0": map(float, args.CL_KNN_neigh.split(":"))[0]},
"AdaboostKWARGSInit" : {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]},
"SCMKWARGSInit" : {"0":args.CL_SCM_max_rules.split(":")[0]},
}
return kwargsInit
def initMultiviewArguments(args, benchmark, views, viewsIndices, accuracies, classifiersConfigs, classifiersNames, fusionMethodConfig, NB_VIEW):
if benchmark["Multiview"]:
try:
if benchmark["Multiview"]["Fusion"]:
......@@ -454,8 +282,211 @@ if True:
argumentDictionaries["Multiview"].append(arguments)
except:
pass
return argumentDictionaries
parser = argparse.ArgumentParser(
description='This file is used to benchmark the accuracies fo multiple classification algorithm on multiview data.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
default='Plausible')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of database : .hdf5 or .csv (default: %(default)s)',
default='.hdf5')
groupStandard.add_argument('--views', metavar='STRING', action='store',help='Name of the views selected for learning (default: %(default)s)',
default='')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',help='Path to the views (default: %(default)s)',
default='/home/bbauvin/Documents/Data/Data_multi_omics/')
groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
help='Niceness for the process', default=0)
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
help='Determine the learning rate if > 1.0, number of fold for cross validation', type=float,
default=0.7)
groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
type=int, default=2 )
groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
default=2)
groupClass.add_argument('--CL_classes', metavar='STRING', action='store',
help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
'selected ex. walrus:mole:leopard', default="jambon:poney")
groupClass.add_argument('--CL_type', metavar='STRING', action='store',
help='Determine whether to use Multiview, Monoview, or Benchmark, separate with : if multiple',
default='Benchmark')
# groupClass.add_argument('--CL_algorithm', metavar='STRING', action='store',
# help='Determine which classifier to use, if empty, considering all', default='')
groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store',
help='Determine which monoview classifier to use, separate with : if multiple, if empty, considering all', default='')
groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store',
help='Determine which multiview classifier to use, separate with : if multiple, if empty, considering all', default='')
groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=1)
groupClass.add_argument('--CL_statsiter', metavar='INT', action='store', help='Number of iteration for each algorithm to mean results', type=int,
default=2)
groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+",
help='Determine which metrics to use, separate metric and configuration with ":". If multiple, separate with space. If no metric is specified, considering all with accuracy for classification '
'first one will be used for classification', default=[''])
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
help='Determine how many Randomized grid search tests to do', type=int, default=2)
groupClass.add_argument('--CL_GS_type', metavar='STRING', action='store',
help='Determine which hyperparamter search function use', default="randomizedSearch")
groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='Number max trees',
default='25')
groupRF.add_argument('--CL_RF_max_depth', metavar='STRING', action='store', help='Max depth for the trees',
default='5')
groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
groupSVMLinear.add_argument('--CL_SVML_C', metavar='STRING', action='store', help='Penalty parameter used',
default='1')
groupSVMRBF = parser.add_argument_group('SVW-RBF arguments')
groupSVMRBF.add_argument('--CL_SVMR_C', metavar='STRING', action='store', help='Penalty parameter used',
default='1')
groupSVMPoly = parser.add_argument_group('Poly SVM arguments')
groupSVMPoly.add_argument('--CL_SVMP_C', metavar='STRING', action='store', help='Penalty parameter used',
default='1')
groupSVMPoly.add_argument('--CL_SVMP_deg', metavar='STRING', action='store', help='Degree parameter used',
default='2')
groupAdaboost = parser.add_argument_group('Adaboost arguments')
groupAdaboost.add_argument('--CL_Ada_n_est', metavar='STRING', action='store', help='Number of estimators',
default='2')
groupAdaboost.add_argument('--CL_Ada_b_est', metavar='STRING', action='store', help='Estimators',
default='DecisionTreeClassifier')
groupRF = parser.add_argument_group('Decision Trees arguments')
groupRF.add_argument('--CL_DT_depth', metavar='STRING', action='store',
help='Determine max depth for Decision Trees', default='3')
groupSGD = parser.add_argument_group('SGD arguments')
groupSGD.add_argument('--CL_SGD_alpha', metavar='STRING', action='store',
help='Determine alpha for SGDClassifier', default='0.1')
groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
help='Determine loss for SGDClassifier', default='log')
groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
help='Determine penalty for SGDClassifier', default='l2')
groupSGD = parser.add_argument_group('KNN arguments')
groupSGD.add_argument('--CL_KNN_neigh', metavar='STRING', action='store',
help='Determine number of neighbors for KNN', default='1')
groupSGD = parser.add_argument_group('SCM arguments')
groupSGD.add_argument('--CL_SCM_max_rules', metavar='STRING', action='store',
help='Max number of rules for SCM', default='1')
groupMumbo = parser.add_argument_group('Mumbo arguments')
groupMumbo.add_argument('--MU_types', metavar='STRING', action='store',
help='Determine which monoview classifier to use with Mumbo',default='DecisionTree')
groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifier in Mumbo',
default=[''])
groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
help='Max number of iteration, min number of iteration, convergence threshold', type=float,
default=[10,1, 0.01])
groupFusion = parser.add_argument_group('Fusion arguments')
groupFusion.add_argument('--FU_types', metavar='STRING', action='store',
help='Determine which type of fusion to use, if multiple separate with :',
default='LateFusion:EarlyFusion')
groupFusion.add_argument('--FU_early_methods', metavar='STRING', action='store',
help='Determine which early fusion method of fusion to use, if multiple separate with :',
default='')
groupFusion.add_argument('--FU_late_methods', metavar='STRING', action='store',
help='Determine which late fusion method of fusion to use, if multiple separate with :',
default='')
groupFusion.add_argument('--FU_method_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the fusion method', default=[''])
groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifiers used', default=[''])
groupFusion.add_argument('--FU_cl_names', metavar='STRING', action='store',
help='Names of the classifier used for fusion, one per view separated by :', default='')
groupFusion.add_argument('--FU_fixed', action='store_true',
help='Determine if you want fusion for the monoview classifier in the same order as written')
args = parser.parse_args()
os.nice(args.nice)
nbCores = args.CL_cores
statsIter = args.CL_statsiter
start = time.time()
if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plausible", "KMultiOmic"]:
getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
else:
pass
getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:])
try:
gridSearch = args.CL_GS_type
except:
gridSearch = False
initLogFile(args)
DATASET, LABELS_DICTIONARY = getDatabase(args.views.split(":"), args.pathF, args.name, len(args.CL_classes), args.CL_classes)
# datasetLength = DATASET.get("Metadata").attrs["datasetLength"]
datasetFiles = initMultipleDatasets(args, nbCores)
views, viewsIndices, allViews = initViews(DATASET, args)
if not views:
raise ValueError, "Empty views list, modify selected views to match dataset "+args.views
NB_VIEW = len(views)
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
metrics = [metric.split(":") for metric in args.CL_metrics]
if metrics == [[""]]:
metricsNames = [name for _, name, isPackage
in pkgutil.iter_modules(['Metrics']) if not isPackage and name!="log_loss"]
metrics = [[metricName, None] for metricName in metricsNames]
logging.info("Start:\t Finding all available mono- & multiview algorithms")
benchmark = initBenchmark(args)
fusionMethodConfig = [args.FU_method_config[0].split(":"), "b"]
initKWARGS = initKWARGS(args)
dataBaseTime = time.time()-start
argumentDictionaries = {"Monoview": {}, "Multiview": []}
argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS, initKWARGS)
bestClassifiers = []
bestClassifiersConfigs = []
resultsMonoview = []
labelsNames = LABELS_DICTIONARY.values()
if nbCores>1:
nbExperiments = len(argumentDictionaries["Monoview"])
for stepIndex in range(int(math.ceil(float(nbExperiments)/nbCores))):
resultsMonoview+=(Parallel(n_jobs=nbCores)(
delayed(ExecMonoview_multicore)(args.name, labelsNames, args.CL_split, args.CL_nbFolds, coreIndex, args.type, args.pathF, statsIter, gridSearch=gridSearch,
metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
else:
resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])),
DATASET.get("Labels").value, args.name, labelsNames,
args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, statsIter,
gridSearch=gridSearch, metrics=metrics, nIter=args.CL_GS_iter,
**arguments)
for arguments in argumentDictionaries["Monoview"]])
accuracies = [[result[1][2][metrics[0][0]][2] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
monoviewTime = time.time()-dataBaseTime-start
argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, accuracies, classifiersConfigs, classifiersNames, fusionMethodConfig, NB_VIEW)
if nbCores>1:
resultsMultiview = []
nbExperiments = len(argumentDictionaries["Multiview"])
......
......@@ -26,6 +26,7 @@ def DecisionTree(data, labels, arg, weights):
def getConfig(classifierConfig):
print classifierConfig
depth = classifierConfig[0]
subSampling = classifierConfig[1]
return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' '
......
......@@ -89,7 +89,7 @@ def getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices
return DBString, viewNames
def getAlgoConfig(initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times):
def getAlgoConfig(classifiersIterations, initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times):
classifierNames = initKWARGS["classifiersNames"]
maxIter = initKWARGS["maxIter"]
minIter = initKWARGS["minIter"]
......@@ -102,13 +102,14 @@ def getAlgoConfig(initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times):
# kFoldPredictionTime = [np.mean(np.array([kFoldPredictionTime[statsIterIndex][foldIdx]
# for statsIterIndex in range(len(kFoldPredictionTime))]))
# for foldIdx in range(len(kFoldPredictionTime[0]))]
weakClassifierConfigs = [getattr(globals()[classifierName], 'getConfig')(classifiersConfig) for classifiersConfig,
classifierName
in zip(classifiersConfig, classifierNames)]
in zip(classifiersIterations[0].classifiersConfigs, classifiersIterations[0].classifiersNames)]
classifierAnalysis = [classifierName + " " + weakClassifierConfig + "on " + feature for classifierName,
weakClassifierConfig,
feature
in zip(classifierNames, weakClassifierConfigs, viewNames)]
in zip(classifiersIterations[0].classifiersNames, weakClassifierConfigs, viewNames)]
gridSearchString = ""
if gridSearch:
gridSearchString += "Configurations found by randomized search with "+str(nIter)+" iterations"
......@@ -423,7 +424,7 @@ def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, D
CLASS_LABELS = DATASET.get("Labels")[...]
dbConfigurationString, viewNames = getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, LABELS_DICTIONARY)
algoConfigurationString, classifierAnalysis = getAlgoConfig(initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times)
algoConfigurationString, classifierAnalysis = getAlgoConfig(classifiersIterations, initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times)
(totalScoreOnTrain, totalScoreOnTest, meanAverageAccuracies, viewsStats, scoresOnTainByIter,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment