Added FatSCMLateFusion and reworked execmonoview slightly

266b200b · Baptiste Bauvin · edf3f9db · 266b200b · 266b200b · 266b200b
Commit 266b200b authored 7 years ago by Baptiste Bauvin
--- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
+++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py
@@ -23,88 +23,12 @@ __status__ = "Prototype"  # Production, Development, Prototype
 # __date__ = 2016 - 03 - 25


-def initConstants(args, X, classificationIndices, labelsNames, name, directory):
-    try:
-        kwargs = args["args"]
-    except KeyError:
-        kwargs = args
-    t_start = time.time()
-    if type(X.attrs["name"]) == bytes:
-        feat = X.attrs["name"].decode("utf-8")
-    else:
-        feat = X.attrs["name"]
-    CL_type = kwargs["CL_type"]
-    X = getValue(X)
-    learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1]))
-    labelsString = "-".join(labelsNames)
-    CL_type_string = CL_type
-
-    outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \
-                     '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-"
-    if not os.path.exists(os.path.dirname(outputFileName)):
-        try:
-            os.makedirs(os.path.dirname(outputFileName))
-        except OSError as exc:
-            if exc.errno != errno.EEXIST:
-                raise
-    return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName
-
-
-def initTrainTest(X, Y, classificationIndices):
-    trainIndices, testIndices, testIndicesMulticlass = classificationIndices
-    X_train = extractSubset(X, trainIndices)
-    X_test = extractSubset(X, testIndices)
-    if testIndicesMulticlass != []:
-        X_test_multiclass = extractSubset(X, testIndicesMulticlass)
-    else:
-        X_test_multiclass = []
-    y_train = Y[trainIndices]
-    y_test = Y[testIndices]
-    return X_train, y_train, X_test, y_test, X_test_multiclass
-
-
-def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState,
-           outputFileName, KFolds, nbCores, metrics, kwargs):
-    if hyperParamSearch != "None":
-        logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type)
-        classifierHPSearch = getattr(classifierModule, hyperParamSearch)
-        cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores,
-                                     metric=metrics[0], nIter=nIter)
-        clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc))
-        logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings")
-    else:
-        clKWARGS = kwargs[CL_type + "KWARGS"]
-    return clKWARGS
-
-
-def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis):
-    logging.info(stringAnalysis)
-    outputTextFile = open(outputFileName + '.txt', 'w')
-    outputTextFile.write(stringAnalysis)
-    outputTextFile.close()
-    np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",")
-    np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",")
-    np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",")
-
-    if imagesAnalysis is not None:
-        for imageName in imagesAnalysis:
-            if os.path.isfile(outputFileName + imageName + ".png"):
-                for i in range(1, 20):
-                    testFileName = outputFileName + imageName + "-" + str(i) + ".png"
-                    if not os.path.isfile(testFileName):
-                        imagesAnalysis[imageName].savefig(testFileName)
-                        break
-
-            imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')


 def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType,
                           path, randomState, labels, hyperParamSearch="randomizedSearch",
                           metrics=[["accuracy_score", None]], nIter=30, **args):
    DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r")
-    # kwargs = args["args"]
-    # views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in
-    #          range(DATASET.get("Metadata").attrs["nbView"])]
    neededViewIndex = args["viewIndex"]
    X = DATASET.get("View" + str(neededViewIndex))
    Y = labels
@@ -186,6 +110,82 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
    return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred]


+def initConstants(args, X, classificationIndices, labelsNames, name, directory):
+    try:
+        kwargs = args["args"]
+    except KeyError:
+        kwargs = args
+    t_start = time.time()
+    if type(X.attrs["name"]) == bytes:
+        feat = X.attrs["name"].decode("utf-8")
+    else:
+        feat = X.attrs["name"]
+    CL_type = kwargs["CL_type"]
+    X = getValue(X)
+    learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1]))
+    labelsString = "-".join(labelsNames)
+    CL_type_string = CL_type
+
+    outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \
+                     '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-"
+    if not os.path.exists(os.path.dirname(outputFileName)):
+        try:
+            os.makedirs(os.path.dirname(outputFileName))
+        except OSError as exc:
+            if exc.errno != errno.EEXIST:
+                raise
+    return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName
+
+
+def initTrainTest(X, Y, classificationIndices):
+    trainIndices, testIndices, testIndicesMulticlass = classificationIndices
+    X_train = extractSubset(X, trainIndices)
+    X_test = extractSubset(X, testIndices)
+    if testIndicesMulticlass != []:
+        X_test_multiclass = extractSubset(X, testIndicesMulticlass)
+    else:
+        X_test_multiclass = []
+    y_train = Y[trainIndices]
+    y_test = Y[testIndices]
+    return X_train, y_train, X_test, y_test, X_test_multiclass
+
+
+def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState,
+           outputFileName, KFolds, nbCores, metrics, kwargs):
+    if hyperParamSearch != "None":
+        logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type)
+        classifierHPSearch = getattr(classifierModule, hyperParamSearch)
+        cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores,
+                                     metric=metrics[0], nIter=nIter)
+        clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc))
+        logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings")
+    else:
+        clKWARGS = kwargs[CL_type + "KWARGS"]
+    return clKWARGS
+
+
+def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis):
+    logging.info(stringAnalysis)
+    outputTextFile = open(outputFileName + '.txt', 'w')
+    outputTextFile.write(stringAnalysis)
+    outputTextFile.close()
+    np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",")
+    np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",")
+    np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",")
+
+    if imagesAnalysis is not None:
+        for imageName in imagesAnalysis:
+            if os.path.isfile(outputFileName + imageName + ".png"):
+                for i in range(1, 20):
+                    testFileName = outputFileName + imageName + "-" + str(i) + ".png"
+                    if not os.path.isfile(testFileName):
+                        imagesAnalysis[imageName].savefig(testFileName)
+                        break
+
+            imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')
+
+
+
 if __name__ == '__main__':
    """The goal of this part of the module is to be able to execute a monoview experimentation
     on a node of a cluster independently.

--- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/FatSCMLateFusionModule.py
+++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/FatSCMLateFusionModule.py
+import numpy as np
+from pyscm.scm import SetCoveringMachineClassifier as scm
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.externals.six import iteritems
+
+
+def genName(config):
+    return "FatSCMLateFusion"
+
+
+def getBenchmark(benchmark, args=None):
+    benchmark["Multiview"]["FatSCMLateFusion"] = ["take_everything"]
+    return benchmark
+
+
+def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):
+    argumentsList = []
+    monoviewDecisions = np.transpose(np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview]))
+    arguments = {"CL_type": "FatSCMLateFusion",
+                 "views": ["all"],
+                 "NB_VIEW": len(resultsMonoview),
+                 "viewsIndices": range(len(resultsMonoview)),
+                 "NB_CLASS": len(args.CL_classes),
+                 "LABELS_NAMES": args.CL_classes,
+                 "FatSCMLateFusionKWARGS": {
+                     "monoviewDecisions": monoviewDecisions,
+                     "p": args.FSCMLF_p,
+                     "max_attributes": args.FSCMLF_max_attributes,
+                     "model":args.FSCMLF_model,
+                 }
+                 }
+    argumentsList.append(arguments)
+    return argumentsList
+
+
+def genParamsSets(classificationKWARGS, randomState, nIter=1):
+    """Used to generate parameters sets for the random hyper parameters optimization function"""
+    paramsSets = []
+    for _ in range(nIter):
+        max_attributes = randomState.randint(1, 20)
+        p = randomState.random_sample()
+        model = randomState.choice(["conjunction", "disjunction"])
+        paramsSets.append([p, max_attributes, model])
+
+    return paramsSets
+
+
+class FatSCMLateFusionClass:
+
+    def __init__(self, randomState, NB_CORES=1, **kwargs):
+        if kwargs["p"]:
+            self.p = kwargs["p"]
+        else:
+            self.p = 0.5
+        if kwargs["max_attributes"]:
+            self.max_attributes = kwargs["max_attributes"]
+        else:
+            self.max_attributes = 5
+        if kwargs["model"]:
+            self.model = kwargs["model"]
+        else:
+            self.model = "conjunction"
+        self.monoviewDecisions = kwargs["monoviewDecisions"]
+        self.randomState = randomState
+
+    def setParams(self, paramsSet):
+        self.p = paramsSet[0]
+        self.max_attributes = paramsSet[1]
+        self.model = paramsSet[2]
+
+    def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]):
+        features = self.monoviewDecisions[trainIndices]
+        self.SCMClassifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_attributes, model_type=self.model,
+                                                 random_state=self.randomState)
+        self.SCMClassifier.fit(features, labels[trainIndices].astype(int))
+
+    def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
+        if usedIndices is None:
+            usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
+        predictedLabels = self.SCMClassifier.predict(self.monoviewDecisions[usedIndices])
+        return predictedLabels
+
+    def predict_probas_hdf5(self, DATASET, usedIndices=None):
+        pass
+
+    def getConfigString(self, classificationKWARGS):
+        return "p : "+str(self.p)+", max_aributes : "+str(self.max_attributes)+", model : "+self.model
+
+    def getSpecificAnalysis(self, classificationKWARGS):
+        stringAnalysis = ''
+        return stringAnalysis
+
+
+class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin):
+    """docstring for SCM
+    A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like
+    CV, gridsearch, and so on ..."""
+
+    def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42):
+        super(DecisionStumpSCMNew, self).__init__()
+        self.model_type = model_type
+        self.p = p
+        self.max_rules = max_rules
+        self.random_state = random_state
+
+    def fit(self, X, y):
+        self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state)
+        self.clf.fit(X=X, y=y)
+
+    def predict(self, X):
+        return self.clf.predict(X)
+
+    def set_params(self, **params):
+        for key, value in iteritems(params):
+            if key == 'p':
+                self.p = value
+            if key == 'model_type':
+                self.model_type = value
+            if key == 'max_rules':
+                self.max_rules = value
+
+    def get_stats(self):
+        return {"Binary_attributes": self.clf.model_.rules}
--- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/__init__.py
+++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/__init__.py
+from . import FatSCMLateFusionModule, analyzeResults
\ No newline at end of file
--- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/analyzeResults.py
+++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/analyzeResults.py
+from ...Multiview import analyzeResults
+
+# Author-Info
+__author__ = "Baptiste Bauvin"
+__status__ = "Prototype"  # Production, Development, Prototype
+
+
+def execute(classifier, trainLabels,
+            testLabels, DATASET,
+            classificationKWARGS, classificationIndices,
+            LABELS_DICTIONARY, views, nbCores, times,
+            name, KFolds,
+            hyperParamSearch, nIter, metrics,
+            viewsIndices, randomState, labels, classifierModule):
+    return analyzeResults.execute(classifier, trainLabels,
+            testLabels, DATASET,
+            classificationKWARGS, classificationIndices,
+            LABELS_DICTIONARY, views, nbCores, times,
+            name, KFolds,
+            hyperParamSearch, nIter, metrics,
+            viewsIndices, randomState, labels, classifierModule)
\ No newline at end of file
--- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusionPackage/SCMForLinear.py
+++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusionPackage/SCMForLinear.py
@@ -45,7 +45,6 @@ class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin):


 def genParamsSets(classificationKWARGS, randomState, nIter=1):
-    nbView = classificationKWARGS["nbView"]
    paramsSets = []
    for _ in range(nIter):
        max_attributes = randomState.randint(1, 20)

--- a/Code/MonoMultiViewClassifiers/utils/execution.py
+++ b/Code/MonoMultiViewClassifiers/utils/execution.py
@@ -198,10 +198,23 @@ def parseTheArgs(arguments):
                                 default=[])

    groupMumboNew = parser.add_argument_group('New Mumbo implementation arguments')
-    groupFatLateFusion.add_argument('--MUN_n_estimators', metavar='INT', action='store',
+    groupMumboNew.add_argument('--MUN_n_estimators', metavar='INT', action='store',
                                    help='Determine the number of esitmators for mumbo', type=int,
                                    default=10)

+    groupFatSCMLateFusion = parser.add_argument_group('Fat SCM Late Fusion arguments')
+    groupFatSCMLateFusion.add_argument('--FSCMLF_p', metavar='FLOAT', action='store',
+                                    help='Determine the p argument of the SCM', type=float,
+                                    default=0.5)
+    groupFatSCMLateFusion.add_argument('--FSCMLF_max_attributes', metavar='INT', action='store',
+                                    help='Determine the maximum number of aibutes used by the SCM', type=int,
+                                    default=4)
+    groupFatSCMLateFusion.add_argument('--FSCMLF_model', metavar='STRING', action='store',
+                                    help='Determine the model type of the SCM',
+                                    default="conjunction")
+
+
+
    args = parser.parse_args(arguments)
    return args