From e78c4e02ced9e773d89c363ae2a6d69d1a6af199 Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin.1@ulaval.ca>
Date: Thu, 25 Jan 2018 20:56:58 +0100
Subject: [PATCH] Worked on cluster adaptation for monoview calssification

---
 Code/MonoMultiViewClassifiers/ExecClassif.py  | 20 ++--
 .../Monoview/ExecClassifMonoView.py           | 95 +++++++++++--------
 .../MonoMultiViewClassifiers/utils/Dataset.py |  9 +-
 .../utils/execution.py                        |  4 +-
 4 files changed, 78 insertions(+), 50 deletions(-)

diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py
index c0c31198..e7e6ca9d 100644
--- a/Code/MonoMultiViewClassifiers/ExecClassif.py
+++ b/Code/MonoMultiViewClassifiers/ExecClassif.py
@@ -167,7 +167,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
     resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
                                                coreIndex, args.type, args.pathF, randomState, labels,
                                                hyperParamSearch=hyperParamSearch, metrics=metrics,
-                                               nIter=args.CL_GS_iter, **argument)
+                                               nIter=args.CL_HPS_iter, **argument)
                         for argument in argumentDictionaries["Monoview"]]
     logging.debug("Done:\t Monoview benchmark")
 
@@ -181,7 +181,7 @@ def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, class
     resultsMultiview += [
         ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
                                 args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
-                                metrics=metrics, nIter=args.CL_GS_iter, **arguments)
+                                metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
         for arguments in argumentDictionaries["Multiview"]]
     logging.debug("Done:\t Multiview benchmark")
 
@@ -220,7 +220,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
             delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds,
                                             coreIndex, args.type, args.pathF, randomState, labels,
                                             hyperParamSearch=hyperParamSearch,
-                                            metrics=metrics, nIter=args.CL_GS_iter,
+                                            metrics=metrics, nIter=args.CL_HPS_iter,
                                             **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores])
             for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))))
     logging.debug("Done:\t Monoview benchmark")
@@ -238,7 +238,7 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non
         resultsMultiview += Parallel(n_jobs=nbCores)(
             delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds,
                                              args.type, args.pathF, LABELS_DICTIONARY, randomState, labels,
-                                             hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter,
+                                             hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_HPS_iter,
                                              **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
             for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
     logging.debug("Done:\t Multiview benchmark")
@@ -280,7 +280,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non
         resultsMonoview += [ExecMonoview(directory, X, Y, args.name, labelsNames, classificationIndices, kFolds,
                                                    1, args.type, args.pathF, randomState,
                                                    hyperParamSearch=hyperParamSearch, metrics=metrics,
-                                                   nIter=args.CL_GS_iter, **arguments)]
+                                                   nIter=args.CL_HPS_iter, **arguments)]
     logging.debug("Done:\t Monoview benchmark")
 
     logging.debug("Start:\t Multiview arguments initialization")
@@ -294,7 +294,7 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non
         resultsMultiview += [
             ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type,
                                     args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch,
-                                    metrics=metrics, nIter=args.CL_GS_iter, **arguments)]
+                                    metrics=metrics, nIter=args.CL_HPS_iter, **arguments)]
     logging.debug("Done:\t Multiview benchmark")
 
     return [flag, resultsMonoview, resultsMultiview]
@@ -444,7 +444,7 @@ def execClassif(arguments):
 #     resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds,
 #                                                coreIndex, args.type, args.pathF, randomState,
 #                                                hyperParamSearch=hyperParamSearch,
-#                                                metrics=metrics, nIter=args.CL_GS_iter,
+#                                                metrics=metrics, nIter=args.CL_HPS_iter,
 #                                                **arguments)
 #                         for arguments in argumentDictionaries["Monoview"]]
 #     monoviewTime = time.time() - dataBaseTime - start
@@ -456,7 +456,7 @@ def execClassif(arguments):
 #     resultsMultiview += [
 #         ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type,
 #                                 args.pathF, LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
-#                                 metrics=metrics, nIter=args.CL_GS_iter, **arguments)
+#                                 metrics=metrics, nIter=args.CL_HPS_iter, **arguments)
 #         for arguments in argumentDictionaries["Multiview"]]
 #     multiviewTime = time.time() - monoviewTime - dataBaseTime - start
 #
@@ -501,14 +501,14 @@ def execClassif(arguments):
 #                                                  args.type,
 #                                                  args.pathF,
 #                                                  LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
-#                                                  metrics=metrics, nIter=args.CL_GS_iter,
+#                                                  metrics=metrics, nIter=args.CL_HPS_iter,
 #                                                  **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex])
 #                 for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))
 #     else:
 #         resultsMultiview = [
 #             ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, args.pathF,
 #                           LABELS_DICTIONARY, randomState, hyperParamSearch=hyperParamSearch,
-#                           metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in
+#                           metrics=metrics, nIter=args.CL_HPS_iter, **arguments) for arguments in
 #             argumentDictionaries["Multiview"]]
 #     multiviewTime = time.time() - monoviewTime - dataBaseTime - start
 #     if nbCores > 1:
diff --git a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
index fc0102a0..4f6d844a 100644
--- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
+++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
@@ -196,85 +196,106 @@ if __name__ == '__main__':
     import argparse
     import pickle
 
+    from ..utils import Dataset
+
     parser = argparse.ArgumentParser(
         description='This methods is used to execute a multiclass classification with one single view. ',
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
     groupStandard = parser.add_argument_group('Standard arguments')
     groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
-    groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5")
     groupStandard.add_argument('--name', metavar='STRING', action='store',
-                               help='Name of Database (default: %(default)s)', default='DB')
+                               help='Name of Database', default='Plausible')
+    groupStandard.add_argument('--cl_name', metavar='STRING', action='store',
+                               help='THe name of the monoview classifier to use', default='DecisionTree')
     groupStandard.add_argument('--view', metavar='STRING', action='store',
-                               help='Name of Feature for Classification (default: %(default)s)', default='View0')
+                               help='Name of the view used', default='View0')
     groupStandard.add_argument('--pathF', metavar='STRING', action='store',
-                               help='Path to the views (default: %(default)s)', default='Results-FeatExtr/')
+                               help='Path to the database hdf5 file', default='../../../Data/Plausible')
     groupStandard.add_argument('--directory', metavar='STRING', action='store',
-                               help='Path to the views (default: %(default)s)', default='Results-FeatExtr/')
+                               help='Path of the output directory', default='')
     groupStandard.add_argument('--labelsNames', metavar='STRING', action='store', nargs='+',
-                               help='Name of classLabels CSV-file  (default: %(default)s)', default='classLabels.csv')
+                               help='Name of the labels used for classification', default=['Yes', 'No'])
     groupStandard.add_argument('--classificationIndices', metavar='STRING', action='store',
-                               help='Name of classLabels-Description CSV-file  (default: %(default)s)',
-                               default='classLabels-Description.csv')
-    groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
-                            default=1)
+                               help='Path to the classificationIndices pickle file',
+                               default='')
+    groupStandard.add_argument('--KFolds', metavar='STRING', action='store',
+                               help='Path to the kFolds pickle file',
+                               default='')
+    groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all',
+                               type=int, default=1)
     groupStandard.add_argument('--randomState', metavar='INT', action='store',
                                help='Seed for the random state or pickable randomstate file', default=42)
     groupStandard.add_argument('--hyperParamSearch', metavar='STRING', action='store',
-                               help='The type of method used tosearch the best set of hyper parameters', default='randomizedSearch')
-    groupStandard.add_argument('--metrics', metavar='STRING', action='store', nargs="+",
-                               help='Metrics used in the experimentation, the first will be the one used in CV',
-                               default=[''])
-    groupStandard.add_argument('--nIter', metavar='INT', action='store', help='Number of itetarion in hyper parameter search', type=int,
+                               help='The type of method used to search the best set of hyper parameters',
+                               default='randomizedSearch')
+    groupStandard.add_argument('--metrics', metavar='STRING', action='store',
+                               help='Path to the pickle file describing the metricsused to analyze the performance',
+                               default='')
+    groupStandard.add_argument('--kwargs', metavar='STRING', action='store',
+                               help='Path to the pickle file containing the key-words arguments used for classification',
+                               default='')
+    groupStandard.add_argument('--nIter', metavar='INT', action='store',
+                               help='Number of itetarion in hyper parameter search', type=int,
                                default=10)
 
     args = parser.parse_args()
 
     directory = args.directory
     name = args.name
+    classifierName = args.cl_name
     labelsNames = args.labelsNames
-    classificationIndices = args.classificationIndices
-    KFolds = args.KFolds
+    viewName = args.view
+    with open(args.classificationIndices, 'rb') as handle:
+        classificationIndices = pickle.load(handle)
+    with open(args.KFolds, 'rb') as handle:
+        KFolds = pickle.load(handle)
     nbCores = args.nbCores
-    databaseType = None
     path = args.pathF
-    randomState = args.randomState
+    with open(args.randomState, 'rb') as handle:
+        randomState = pickle.load(handle)
     hyperParamSearch = args.hyperParamSearch
-    metrics = args.metrics
+    with open(args.metrics, 'rb') as handle:
+        metrics = pickle.load(handle)
     nIter = args.nIter
-    kwargs = args.kwargs
-
-    # Extract the data using MPI
-    X = None
-    Y = None
+    with open(args.kwargs, 'rb') as handle:
+        kwargs = pickle.load(handle)
 
-    logfilename = "gen a goodlogfilename"
+    databaseType = None
 
 
+    # Extract the data using MPI
+    X, Y = Dataset.getMonoviewShared(path, name, viewName)
 
-    logfile = directory + logfilename
-    if os.path.isfile(logfile + ".log"):
+    # Init log
+    logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + name + "-"+ viewName +"-" + classifierName +'-LOG'
+    if not os.path.exists(os.path.dirname(directory + logFileName)):
+        try:
+            os.makedirs(os.path.dirname(directory + logFileName))
+        except OSError as exc:
+            if exc.errno != errno.EEXIST:
+                raise
+    logFile = directory + logFileName
+    if os.path.isfile(logFile + ".log"):
         for i in range(1, 20):
-            testFileName = logfilename + "-" + str(i) + ".log"
-            if not os.path.isfile(directory + testFileName):
-                logfile = directory + testFileName
+            testFileName = logFileName + "-" + str(i) + ".log"
+            if not (os.path.isfile(directory + testFileName)):
+                logFile = directory + testFileName
                 break
     else:
-        logfile += ".log"
-
-    logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG,
+        logFile += ".log"
+    logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG,
                         filemode='w')
-
     if args.log:
         logging.getLogger().addHandler(logging.StreamHandler())
 
-
+    # Computing on multiple cores
     res = ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path,
                  randomState, hyperParamSearch=hyperParamSearch,
                  metrics=metrics, nIter=nIter, **kwargs)
 
     with open(directory + "res.pickle", "wb") as handle:
-        pickle.dump(randomState, handle)
+        pickle.dump(res, handle)
 
 
     # Pickle the res in a file to be reused.
diff --git a/Code/MonoMultiViewClassifiers/utils/Dataset.py b/Code/MonoMultiViewClassifiers/utils/Dataset.py
index 738136b6..c2ab4805 100644
--- a/Code/MonoMultiViewClassifiers/utils/Dataset.py
+++ b/Code/MonoMultiViewClassifiers/utils/Dataset.py
@@ -2,7 +2,7 @@ import logging
 import os
 import select
 import sys
-
+import h5py
 import numpy as np
 from scipy import sparse
 
@@ -114,3 +114,10 @@ def input_(timeout=15):
     else:
         return "y"
 
+
+def getMonoviewShared(path, name, viewName, labelsNames, classificationIndices):
+    """ATM is not used with shared memory, but soon :)"""
+    HDF5_dataset_file = h5py.File(path + name + ".hdf5", "w")
+    X = HDF5_dataset_file.get(viewName).value
+    Y = HDF5_dataset_file.get("Labels").value
+    return X, Y
\ No newline at end of file
diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py
index e6cf1107..25ae201f 100644
--- a/Code/MonoMultiViewClassifiers/utils/execution.py
+++ b/Code/MonoMultiViewClassifiers/utils/execution.py
@@ -76,7 +76,7 @@ def parseTheArgs(arguments):
                             , default=[''])
     groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store',
                             help='Determine which metric to use for randomSearch and optimization', default="f1_score")
-    groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
+    groupClass.add_argument('--CL_HPS_iter', metavar='INT', action='store',
                             help='Determine how many hyper parameters optimization tests to do', type=int, default=2)
     groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store',
                             help='Determine which hyperparamter search function use', default="randomizedSearch")
@@ -239,7 +239,7 @@ def initRandomState(randomStateArg, directory):
 def initLogFile(args):
     """Used to init the directory where the preds will be stored and the log file"""
     resultDirectory = "../Results/" + args.name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "/"
-    logFileName = time.strftime("%Y%m%d-%H%M%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
+    logFileName = time.strftime("%Y_%m_%d-%H:%M:%S") + "-" + ''.join(args.CL_type) + "-" + "_".join(
         args.views) + "-" + args.name + "-LOG"
     if not os.path.exists(os.path.dirname(resultDirectory + logFileName)):
         try:
-- 
GitLab