diff --git a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index b048e8acab948bed42ef485c7dfcf5f7850efe86..67cb73960fe5f701831eea863e8784cd54b0e084 100644 --- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -23,88 +23,12 @@ __status__ = "Prototype" # Production, Development, Prototype # __date__ = 2016 - 03 - 25 -def initConstants(args, X, classificationIndices, labelsNames, name, directory): - try: - kwargs = args["args"] - except KeyError: - kwargs = args - t_start = time.time() - if type(X.attrs["name"]) == bytes: - feat = X.attrs["name"].decode("utf-8") - else: - feat = X.attrs["name"] - CL_type = kwargs["CL_type"] - X = getValue(X) - learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1])) - labelsString = "-".join(labelsNames) - CL_type_string = CL_type - - outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \ - '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-" - if not os.path.exists(os.path.dirname(outputFileName)): - try: - os.makedirs(os.path.dirname(outputFileName)) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName - - -def initTrainTest(X, Y, classificationIndices): - trainIndices, testIndices, testIndicesMulticlass = classificationIndices - X_train = extractSubset(X, trainIndices) - X_test = extractSubset(X, testIndices) - if testIndicesMulticlass != []: - X_test_multiclass = extractSubset(X, testIndicesMulticlass) - else: - X_test_multiclass = [] - y_train = Y[trainIndices] - y_test = Y[testIndices] - return X_train, y_train, X_test, y_test, X_test_multiclass - - -def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState, - outputFileName, KFolds, nbCores, metrics, kwargs): - if hyperParamSearch != "None": - logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type) - classifierHPSearch = getattr(classifierModule, hyperParamSearch) - cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores, - metric=metrics[0], nIter=nIter) - clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc)) - logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings") - else: - clKWARGS = kwargs[CL_type + "KWARGS"] - return clKWARGS - - -def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis): - logging.info(stringAnalysis) - outputTextFile = open(outputFileName + '.txt', 'w') - outputTextFile.write(stringAnalysis) - outputTextFile.close() - np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",") - np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",") - np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",") - - if imagesAnalysis is not None: - for imageName in imagesAnalysis: - if os.path.isfile(outputFileName + imageName + ".png"): - for i in range(1, 20): - testFileName = outputFileName + imageName + "-" + str(i) + ".png" - if not os.path.isfile(testFileName): - imagesAnalysis[imageName].savefig(testFileName) - break - - imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png') def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, path, randomState, labels, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r") - # kwargs = args["args"] - # views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in - # range(DATASET.get("Metadata").attrs["nbView"])] neededViewIndex = args["viewIndex"] X = DATASET.get("View" + str(neededViewIndex)) Y = labels @@ -186,6 +110,82 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred] +def initConstants(args, X, classificationIndices, labelsNames, name, directory): + try: + kwargs = args["args"] + except KeyError: + kwargs = args + t_start = time.time() + if type(X.attrs["name"]) == bytes: + feat = X.attrs["name"].decode("utf-8") + else: + feat = X.attrs["name"] + CL_type = kwargs["CL_type"] + X = getValue(X) + learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1])) + labelsString = "-".join(labelsNames) + CL_type_string = CL_type + + outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \ + '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-" + if not os.path.exists(os.path.dirname(outputFileName)): + try: + os.makedirs(os.path.dirname(outputFileName)) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName + + +def initTrainTest(X, Y, classificationIndices): + trainIndices, testIndices, testIndicesMulticlass = classificationIndices + X_train = extractSubset(X, trainIndices) + X_test = extractSubset(X, testIndices) + if testIndicesMulticlass != []: + X_test_multiclass = extractSubset(X, testIndicesMulticlass) + else: + X_test_multiclass = [] + y_train = Y[trainIndices] + y_test = Y[testIndices] + return X_train, y_train, X_test, y_test, X_test_multiclass + + +def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState, + outputFileName, KFolds, nbCores, metrics, kwargs): + if hyperParamSearch != "None": + logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type) + classifierHPSearch = getattr(classifierModule, hyperParamSearch) + cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores, + metric=metrics[0], nIter=nIter) + clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc)) + logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings") + else: + clKWARGS = kwargs[CL_type + "KWARGS"] + return clKWARGS + + +def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis): + logging.info(stringAnalysis) + outputTextFile = open(outputFileName + '.txt', 'w') + outputTextFile.write(stringAnalysis) + outputTextFile.close() + np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",") + + if imagesAnalysis is not None: + for imageName in imagesAnalysis: + if os.path.isfile(outputFileName + imageName + ".png"): + for i in range(1, 20): + testFileName = outputFileName + imageName + "-" + str(i) + ".png" + if not os.path.isfile(testFileName): + imagesAnalysis[imageName].savefig(testFileName) + break + + imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png') + + + if __name__ == '__main__': """The goal of this part of the module is to be able to execute a monoview experimentation on a node of a cluster independently. diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/FatSCMLateFusionModule.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/FatSCMLateFusionModule.py new file mode 100644 index 0000000000000000000000000000000000000000..6037b54b7b0a6823e5942d6f37b8299bf7a18952 --- /dev/null +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/FatSCMLateFusionModule.py @@ -0,0 +1,123 @@ +import numpy as np +from pyscm.scm import SetCoveringMachineClassifier as scm +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.externals.six import iteritems + + +def genName(config): + return "FatSCMLateFusion" + + +def getBenchmark(benchmark, args=None): + benchmark["Multiview"]["FatSCMLateFusion"] = ["take_everything"] + return benchmark + + +def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): + argumentsList = [] + monoviewDecisions = np.transpose(np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview])) + arguments = {"CL_type": "FatSCMLateFusion", + "views": ["all"], + "NB_VIEW": len(resultsMonoview), + "viewsIndices": range(len(resultsMonoview)), + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FatSCMLateFusionKWARGS": { + "monoviewDecisions": monoviewDecisions, + "p": args.FSCMLF_p, + "max_attributes": args.FSCMLF_max_attributes, + "model":args.FSCMLF_model, + } + } + argumentsList.append(arguments) + return argumentsList + + +def genParamsSets(classificationKWARGS, randomState, nIter=1): + """Used to generate parameters sets for the random hyper parameters optimization function""" + paramsSets = [] + for _ in range(nIter): + max_attributes = randomState.randint(1, 20) + p = randomState.random_sample() + model = randomState.choice(["conjunction", "disjunction"]) + paramsSets.append([p, max_attributes, model]) + + return paramsSets + + +class FatSCMLateFusionClass: + + def __init__(self, randomState, NB_CORES=1, **kwargs): + if kwargs["p"]: + self.p = kwargs["p"] + else: + self.p = 0.5 + if kwargs["max_attributes"]: + self.max_attributes = kwargs["max_attributes"] + else: + self.max_attributes = 5 + if kwargs["model"]: + self.model = kwargs["model"] + else: + self.model = "conjunction" + self.monoviewDecisions = kwargs["monoviewDecisions"] + self.randomState = randomState + + def setParams(self, paramsSet): + self.p = paramsSet[0] + self.max_attributes = paramsSet[1] + self.model = paramsSet[2] + + def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): + features = self.monoviewDecisions[trainIndices] + self.SCMClassifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_attributes, model_type=self.model, + random_state=self.randomState) + self.SCMClassifier.fit(features, labels[trainIndices].astype(int)) + + def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): + if usedIndices is None: + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + predictedLabels = self.SCMClassifier.predict(self.monoviewDecisions[usedIndices]) + return predictedLabels + + def predict_probas_hdf5(self, DATASET, usedIndices=None): + pass + + def getConfigString(self, classificationKWARGS): + return "p : "+str(self.p)+", max_aributes : "+str(self.max_attributes)+", model : "+self.model + + def getSpecificAnalysis(self, classificationKWARGS): + stringAnalysis = '' + return stringAnalysis + + +class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): + """docstring for SCM + A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like + CV, gridsearch, and so on ...""" + + def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42): + super(DecisionStumpSCMNew, self).__init__() + self.model_type = model_type + self.p = p + self.max_rules = max_rules + self.random_state = random_state + + def fit(self, X, y): + self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state) + self.clf.fit(X=X, y=y) + + def predict(self, X): + return self.clf.predict(X) + + def set_params(self, **params): + for key, value in iteritems(params): + if key == 'p': + self.p = value + if key == 'model_type': + self.model_type = value + if key == 'max_rules': + self.max_rules = value + + def get_stats(self): + return {"Binary_attributes": self.clf.model_.rules} diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/__init__.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..202c5b697d47664bcc3ed2220b4b5f00c8d731f5 --- /dev/null +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/__init__.py @@ -0,0 +1 @@ +from . import FatSCMLateFusionModule, analyzeResults \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/analyzeResults.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/analyzeResults.py new file mode 100644 index 0000000000000000000000000000000000000000..af44a6fb415aa97cd62c1a52d10dbca3baf3041b --- /dev/null +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/FatSCMLateFusion/analyzeResults.py @@ -0,0 +1,21 @@ +from ...Multiview import analyzeResults + +# Author-Info +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + + +def execute(classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, + LABELS_DICTIONARY, views, nbCores, times, + name, KFolds, + hyperParamSearch, nIter, metrics, + viewsIndices, randomState, labels, classifierModule): + return analyzeResults.execute(classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, + LABELS_DICTIONARY, views, nbCores, times, + name, KFolds, + hyperParamSearch, nIter, metrics, + viewsIndices, randomState, labels, classifierModule) \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusionPackage/SCMForLinear.py index 6551cd11ef68b106440bc34953ab14907ba25fba..623e58ac05ed063d3a47147073474e1ed40ed86e 100644 --- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -45,7 +45,6 @@ class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): def genParamsSets(classificationKWARGS, randomState, nIter=1): - nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): max_attributes = randomState.randint(1, 20) diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py index 9f7db7a9475b4c2fc830d4abc610c7d888c5b28c..db3eb36f71739a5202e9360457c5d7ea47c9b9c1 100644 --- a/Code/MonoMultiViewClassifiers/utils/execution.py +++ b/Code/MonoMultiViewClassifiers/utils/execution.py @@ -198,10 +198,23 @@ def parseTheArgs(arguments): default=[]) groupMumboNew = parser.add_argument_group('New Mumbo implementation arguments') - groupFatLateFusion.add_argument('--MUN_n_estimators', metavar='INT', action='store', + groupMumboNew.add_argument('--MUN_n_estimators', metavar='INT', action='store', help='Determine the number of esitmators for mumbo', type=int, default=10) + groupFatSCMLateFusion = parser.add_argument_group('Fat SCM Late Fusion arguments') + groupFatSCMLateFusion.add_argument('--FSCMLF_p', metavar='FLOAT', action='store', + help='Determine the p argument of the SCM', type=float, + default=0.5) + groupFatSCMLateFusion.add_argument('--FSCMLF_max_attributes', metavar='INT', action='store', + help='Determine the maximum number of aibutes used by the SCM', type=int, + default=4) + groupFatSCMLateFusion.add_argument('--FSCMLF_model', metavar='STRING', action='store', + help='Determine the model type of the SCM', + default="conjunction") + + + args = parser.parse_args(arguments) return args