diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 3d9c6efdf8bdbef10c0a8cf516912faebf51a0fa..dc0fb1aa01b955110a8bcbd1e8879c4b4315a8cd 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -537,13 +537,17 @@ groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store', groupMumbo = parser.add_argument_group('Mumbo arguments') groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+", help='Determine which monoview classifier to use with Mumbo', - default=['DecisionTree', 'DecisionTree', 'DecisionTree']) + default=['']) groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+', - help='Configuration for the monoview classifier in Mumbo', - default=['2:0.5', '2:0.5', '2:0.5']) + help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:', + default=['']) groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3, help='Max number of iteration, min number of iteration, convergence threshold', type=float, default=[10, 1, 0.01]) +groupMumbo.add_argument('--MU_combination', action='store_true', + help='Try all the monoview classifiers combinations for each view', + default=False) + groupFusion = parser.add_argument_group('Fusion arguments') groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+", @@ -670,6 +674,15 @@ if statsIter > 1: else: iterResults = [] for iterIndex in range(statsIter): + if not os.path.exists(os.path.dirname(directories[iterIndex] + "train_labels.csv")): + try: + os.makedirs(os.path.dirname(directories[iterIndex] + "train_labels.csv")) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + trainIndices, testIndices = classificationIndices[iterIndex] + trainLabels = DATASET.get("Labels").value[trainIndices] + np.savetxt(directories[iterIndex] + "train_labels.csv", trainLabels, delimiter=",") iterResults.append( classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], @@ -678,6 +691,15 @@ if statsIter > 1: analyzeIterResults(iterResults, args.name, metrics, directory) else: + if not os.path.exists(os.path.dirname(directories + "train_labels.csv")): + try: + os.makedirs(os.path.dirname(directories + "train_labels.csv")) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + trainIndices, testIndices = classificationIndices + trainLabels = DATASET.get("Labels").value[trainIndices] + np.savetxt(directories + "train_labels.csv", trainLabels, delimiter=",") res = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, kFolds, statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index bacbdee67488c202f269e0887a90e9584d177272..949bcd34ca82cbd8a695ed316e17a401000c6c86 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -66,7 +66,7 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor else: classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) - classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices) + classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices, metric=metrics[0]) trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices) fullLabels = classifier.predict_hdf5(DATASET, viewsIndices=viewsIndices) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index b523e2bf103f040ed5e9f4565ba4334d708afd2a..fc1d65b97637929125e9dcb18fdbdb5c5b8b97f7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -160,7 +160,7 @@ class Fusion: def setParams(self, paramsSet): self.classifier.setParams(paramsSet) - def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): + def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): self.classifier.fit_hdf5(DATASET, trainIndices=trainIndices, viewsIndices=viewsIndices) def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py index f2806f74e7f5b56aa739638de42d01896567a3d5..14fc273d5af819cb10e1b3fe021c20b2fe722871 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py @@ -1,114 +1,252 @@ -from sklearn import tree -from sklearn.metrics import accuracy_score +import sklearn +from sklearn.base import BaseEstimator, ClassifierMixin import numpy as np from ModifiedMulticlass import OneVsRestClassifier from SubSampling import subSample import logging + # Add weights import Metrics -def DecisionTree(data, labels, arg, weights, randomState): - depth = int(arg[0]) - subSampling = float(arg[1]) - if subSampling != 1.0: - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState, - weights=weights) - else: - subSampledData, subSampledLabels, subSampledWeights = data, labels, weights - isBad = False - classifier = tree.DecisionTreeClassifier(max_depth=depth) - # classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth)) - classifier.fit(subSampledData, subSampledLabels, subSampledWeights) - prediction = classifier.predict(data) - accuracy = accuracy_score(labels, prediction) - if accuracy < 0.5: - isBad = True +class DecisionTree(BaseEstimator, ClassifierMixin): + def __init__(self, depth=10, criterion="gini", splitter="best", subSampling=1.0, randomState=None, **kwargs): + if kwargs: + self.depth = kwargs["depth"] + self.criterion = kwargs["criterion"] + self.splitter = kwargs["splitter"] + self.subSampling = kwargs["subSampling"] + self.randomState = kwargs["randomState"] + else: + self.depth = depth + self.criterion = criterion + self.splitter = splitter + self.subSampling = subSampling + if randomState is None: + self.randomState=np.random.RandomState() + else: + self.randomState=randomState + self.decisionTree = sklearn.tree.DecisionTreeClassifier(splitter=self.splitter, criterion=self.criterion, max_depth=self.depth) + + def fit(self, data, labels, sample_weight=None): + if sample_weight is None: + sample_weight = np.ones(len(data))/len(data) + + if self.subSampling != 1.0: + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, self.subSampling, self.randomState, + weights=sample_weight) + else: + subSampledData, subSampledLabels, subSampledWeights = data, labels, sample_weight + + self.decisionTree.fit(subSampledData, subSampledLabels, sample_weight=subSampledWeights) + + return self + + def fit_hdf5(self, data, labels, weights, metric): + metricModule = getattr(Metrics, metric[0]) + if metric[1] is not None: + metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + else: + metricKWARGS = {} + if weights is None: + weights = np.ones(len(data))/len(data) + + # Check that X and y have correct shape + if self.subSampling != 1.0: + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, self.subSampling, self.randomState, + weights=weights) + else: + subSampledData, subSampledLabels, subSampledWeights = data, labels, weights + # self.subSampledData = subSampledData + # self. + # self. + # Store the classes seen during fit + self.decisionTree.fit(subSampledData, subSampledLabels, sample_weight=subSampledWeights) + prediction = self.decisionTree.predict(data) + metricKWARGS = {"0":weights} + averageScore = metricModule.score(labels, prediction, **metricKWARGS) + if averageScore < 0.5: + isBad = True + else: + isBad = False + + # self.X_ = X + # self.y_ = y + # Return the classifier + # self.decisionTree, prediction, isBad, averageScore + return self.decisionTree, prediction, isBad, averageScore + + def predict(self, data): - return classifier, prediction, isBad, accuracy + # Check is fit had been called + # check_is_fitted(self, ['X_', 'y_']) + + # Input validation + # X = check_array(X) + predictedLabels = self.decisionTree.predict(data) + # closest = np.argmin(euclidean_distances(X, self.X_), axis=1) + return predictedLabels + + def get_params(self, deep=True): + # suppose this estimator has parameters "alpha" and "recursive" + return {"depth": self.depth, "criterion": self.criterion, "splitter": self.splitter, "subSampling": self.subSampling} + + def set_params(self, **parameters): + self.depth = parameters["depth"] + self.criterion = parameters["criterion"] + self.splitter = parameters["splitter"] + self.subSampling = parameters["subSampling"] + # for parameter, value in parameters.items(): + # print parameter, value + # self.setattr(parameter, value) + return self + +# def DecisionTree(data, labels, arg, weights, randomState): +# depth = int(arg[0]) +# subSampling = float(arg[1]) +# if subSampling != 1.0: +# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState, +# weights=weights) +# else: +# subSampledData, subSampledLabels, subSampledWeights = data, labels, weights +# isBad = False +# classifier = sklearn.tree.DecisionTreeClassifier(max_depth=depth) +# # classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth)) +# classifier.fit(subSampledData, subSampledLabels, sample_weight=subSampledWeights) +# prediction = classifier.predict(data) +# accuracy = accuracy_score(labels, prediction) +# if accuracy < 0.5: +# isBad = True +# +# return classifier, prediction, isBad, accuracy + + +def getKWARGS(argList, randomState): + kwargs = {"depth":int(argList[0]), "criterion":argList[1], "splitter":argList[2], "subSampling":float(argList[3]), "randomState":randomState} + return kwargs def getConfig(classifierConfig): - depth = classifierConfig[0] - subSampling = classifierConfig[1] - return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' ' - - -def hyperParamSearch(data, labels, randomState, metric="accuracy_score"): - minSubSampling = 1.0 / (len(labels) / 2) - bestSettings = [] - bestResults = [] - classifier = tree.DecisionTreeClassifier(max_depth=1) - preliminary_accuracies = np.zeros(50) - for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.05, randomState) - classifier.fit(subSampledData, subSampledLabels) - prediction = classifier.predict(data) - preliminary_accuracies[i] = accuracy_score(labels, prediction) - preliminary_accuracy = np.mean(preliminary_accuracies) - if preliminary_accuracy < 0.50: - for max_depth in np.arange(10) + 1: - for subSampling in sorted((np.arange(20, dtype=float) + 1) / 20, reverse=True): - if subSampling > minSubSampling: - accuracies = np.zeros(50) - for i in range(50): - if subSampling != 1.0: - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, - randomState) - else: - subSampledData, subSampledLabels, = data, labels - classifier = tree.DecisionTreeClassifier(max_depth=max_depth) - classifier.fit(subSampledData, subSampledLabels) - prediction = classifier.predict(data) - accuracies[i] = accuracy_score(labels, prediction) - accuracy = np.mean(accuracies) - if 0.5 < accuracy < 0.60: - bestSettings.append([max_depth, subSampling]) - bestResults.append(accuracy) + try: + depth = classifierConfig["depth"] + splitter = classifierConfig["splitter"] + criterion = classifierConfig["criterion"] + subSampling = classifierConfig["subSampling"] + return 'with depth ' + str(depth) + ', ' + \ + 'with splitter ' + splitter + ', ' + \ + 'with criterion ' + criterion + ', ' + \ + ' sub-sampled at ' + str(subSampling) + ' ' + except KeyError: + print classifierConfig + + + +def findClosest(scores, base=0.5): + diffToBase = 100.0 + bestSettingsIndex = 0 + for resultIndex, result in enumerate(scores): + if abs(base - result) < diffToBase: + diffToBase = abs(base - result) + bestResult = result + bestSettingsIndex = resultIndex + return bestSettingsIndex + + +def hyperParamSearch(data, labels, randomState, metric=["accuracy_score", None], nbSubSamplingTests=20): + metricModule = getattr(Metrics, metric[0]) + if metric[1] is not None: + metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: - preliminary_accuracies = np.zeros(50) - if minSubSampling < 0.01: - for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.01, randomState) - classifier.fit(subSampledData, subSampledLabels) - prediction = classifier.predict(data) - preliminary_accuracies[i] = accuracy_score(labels, prediction) - preliminary_accuracy = np.mean(preliminary_accuracies) - if preliminary_accuracy < 0.50: - for subSampling in sorted((np.arange(19, dtype=float) + 1) / 200, reverse=True): - if minSubSampling < subSampling: - accuracies = np.zeros(50) - for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, - randomState) - classifier = tree.DecisionTreeClassifier(max_depth=1) - classifier.fit(subSampledData, subSampledLabels) - prediction = classifier.predict(data) - accuracies[i] = accuracy_score(labels, prediction) - accuracy = np.mean(accuracies) - if 0.5 < accuracy < 0.60: - bestSettings.append([1, subSampling]) - bestResults.append(accuracy) - else: - for subSampling in sorted((np.arange(19, dtype=float) + 1) / 2000, reverse=True): - accuracies = np.zeros(50) - for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, - randomState) - if minSubSampling < subSampling: - classifier1 = tree.DecisionTreeClassifier(max_depth=1) - classifier1.fit(subSampledData, subSampledLabels) - prediction = classifier1.predict(data) - accuracies[i] = accuracy_score(labels, prediction) - accuracy = np.mean(accuracies) - if 0.5 < accuracy < 0.60: - bestSettings.append([1, subSampling]) - bestResults.append(accuracy) - - assert bestResults != [], "No good settings found for Decision Tree!" - - return getBestSetting(bestSettings, bestResults) + metricKWARGS = {} + scorer = metricModule.get_scorer(**metricKWARGS) + subSamplingRatios = np.arange(nbSubSamplingTests, dtype=float)/nbSubSamplingTests + maxDepths = np.arange(1)+1 + criterions = ["gini", "entropy"] + splitters = ["best", "random"] + parameters = {"depth":maxDepths, "criterion":criterions, "splitter":splitters, "subSampling":subSamplingRatios} + classifier = DecisionTree() + grid = sklearn.model_selection.GridSearchCV(classifier, parameters, scoring=scorer) + grid.fit(data, labels) + GSSubSamplingRatios = grid.cv_results_["param_subSampling"] + GSMaxDepths = grid.cv_results_["param_depth"] + GSCriterions = grid.cv_results_["param_criterion"] + GSSplitters = grid.cv_results_["param_splitter"] + GSScores = grid.cv_results_["mean_test_score"] + configIndex = findClosest(GSScores) + return {"depth":GSMaxDepths[configIndex], "criterion":GSCriterions[configIndex], "splitter":GSSplitters[configIndex], "subSampling":GSSubSamplingRatios[configIndex], "randomState":randomState} + # bestSettings = [] + # bestResults = [] + # classifier = sklearn.tree.DecisionTreeClassifier(max_depth=1) + # subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.05, randomState) + # classifier.fit(subSampledData, subSampledLabels) + # prediction = classifier.predict(data) + # preliminary_accuracy = accuracy_score(labels, prediction) + # if preliminary_accuracy < 0.50: + # for max_depth in np.arange(10) + 1: + # for subSampling in sorted((np.arange(20, dtype=float) + 1) / 20, reverse=True): + # if subSampling > minSubSampling: + # accuracies = np.zeros(50) + # for i in range(50): + # if subSampling != 1.0: + # subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, + # randomState) + # else: + # subSampledData, subSampledLabels, = data, labels + # classifier = tree.DecisionTreeClassifier(max_depth=max_depth) + # classifier.fit(subSampledData, subSampledLabels) + # prediction = classifier.predict(data) + # accuracies[i] = accuracy_score(labels, prediction) + # accuracy = np.mean(accuracies) + # if 0.5 < accuracy < 0.60: + # bestSettings.append([max_depth, subSampling]) + # bestResults.append(accuracy) + # else: + # preliminary_accuracies = np.zeros(50) + # if minSubSampling < 0.01: + # for i in range(50): + # subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.01, randomState) + # classifier.fit(subSampledData, subSampledLabels) + # prediction = classifier.predict(data) + # preliminary_accuracies[i] = accuracy_score(labels, prediction) + # preliminary_accuracy = np.mean(preliminary_accuracies) + # if preliminary_accuracy < 0.50: + # for subSampling in sorted((np.arange(19, dtype=float) + 1) / 200, reverse=True): + # if minSubSampling < subSampling: + # accuracies = np.zeros(50) + # for i in range(50): + # subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, + # randomState) + # classifier = tree.DecisionTreeClassifier(max_depth=1) + # classifier.fit(subSampledData, subSampledLabels) + # prediction = classifier.predict(data) + # accuracies[i] = accuracy_score(labels, prediction) + # accuracy = np.mean(accuracies) + # if 0.5 < accuracy < 0.60: + # bestSettings.append([1, subSampling]) + # bestResults.append(accuracy) + # else: + # for subSampling in sorted((np.arange(19, dtype=float) + 1) / 2000, reverse=True): + # accuracies = np.zeros(50) + # for i in range(50): + # subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, + # randomState) + # if minSubSampling < subSampling: + # classifier1 = tree.DecisionTreeClassifier(max_depth=1) + # classifier1.fit(subSampledData, subSampledLabels) + # prediction = classifier1.predict(data) + # accuracies[i] = accuracy_score(labels, prediction) + # accuracy = np.mean(accuracies) + # if 0.5 < accuracy < 0.60: + # bestSettings.append([1, subSampling]) + # bestResults.append(accuracy) + # + # # assert bestResults != [], "No good settings found for Decision Tree!" + # if bestResults == []: + # bestSetting = None + # else: + # bestSetting = getBestSetting(bestSettings, bestResults) + # return bestSetting def getBestSetting(bestSettings, bestResults): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index d36f646fcf42e770942a3571c2546162f25d61c8..a6de51d69affff6fd3a78f64cbb7e7540d5c7c15 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -4,6 +4,7 @@ from joblib import Parallel, delayed import itertools from Classifiers import * import time +import Classifiers import pkgutil import logging from sklearn.metrics import accuracy_score @@ -29,19 +30,59 @@ def getBenchmark(benchmark, args=None): def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices): argumentsList = [] - - arguments = {"CL_type": "Mumbo", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(args.CL_classes), - "LABELS_NAMES": args.CL_classes, - "MumboKWARGS": {"classifiersNames": args.MU_types, - "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]), - "threshold": args.MU_iter[2], - "classifiersConfigs": [map(float, argument.split(":")) for argument in - args.MU_config], "nbView": (len(viewsIndices))}} - argumentsList.append(arguments) + nbViews = len(views) + if args.MU_combination and args.MU_types != [""]: + classifiersCombinations = itertools.combinations_with_replacement(args.MU_types, nbViews) + for classifierCombination in classifiersCombinations: + arguments = {"CL_type": "Mumbo", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "MumboKWARGS": {"classifiersNames": classifierCombination, + "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]), + "threshold": args.MU_iter[2], + "classifiersConfigs": [], + "nbView": (len(viewsIndices))}} + argumentsList.append(arguments) + else: + if len(args.MU_types) == nbViews: + pass + elif len(args.MU_types) < nbViews and args.MU_types != ['']: + while len(args.MU_types) < nbViews: + args.MU_types.append(args.MU_types[0]) + elif len(args.MU_types) > nbViews: + args.MU_types = args.MU_types[:nbViews] + else: + args.MU_types = ["DecisionTree" for _ in views] + classifiersModules = [getattr(Classifiers, classifierName) for classifierName in args.MU_types] + if args.MU_config != [""]: + arguments = {"CL_type": "Mumbo", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "MumboKWARGS": {"classifiersNames": args.MU_types, + "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]), + "threshold": args.MU_iter[2], + "classifiersConfigs": [classifierModule.getKWARGS(argument.split(":"), randomState) for argument, classifierModule in + zip(args.MU_config, classifiersModules)], + "nbView": (len(viewsIndices))}} + else: + arguments = {"CL_type": "Mumbo", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "MumboKWARGS": {"classifiersNames": args.MU_types, + "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]), + "threshold": args.MU_iter[2], + "classifiersConfigs": [], + "nbView": (len(viewsIndices))}} + argumentsList.append(arguments) return argumentsList @@ -56,28 +97,25 @@ def computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatr for exampleIndice in range(DATASET_LENGTH)]) return weights - -def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, - DATASET_LENGTH, viewIndice, classifier_config, iterIndex, costMatrices): - weights = computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatrices) - classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string - classifierMethod = getattr(classifierModule, classifierName) - classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, - weights) - logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) - return classifier, classes, isBad, averageAccuracy +# +# def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, +# DATASET_LENGTH, viewIndice, classifier_config, iterIndex, costMatrices): +# weights = computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatrices) +# classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string +# classifierMethod = getattr(classifierModule, classifierName) +# classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, +# weights) +# logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) +# return classifier, classes, isBad, averageAccuracy -def trainWeakClassifier_hdf5(classifierName, monoviewDataset, CLASS_LABELS, DATASET_LENGTH, +def trainWeakClassifier_hdf5(classifier, classifierName, monoviewDataset, CLASS_LABELS, DATASET_LENGTH, viewIndice, classifier_config, viewName, iterIndex, costMatrices, classifierIndex, - randomState): + randomState, metric): weights = computeWeights(DATASET_LENGTH, iterIndex, classifierIndex, CLASS_LABELS, costMatrices) - classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string - classifierMethod = getattr(classifierModule, classifierName) - classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, - weights, randomState) - logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) - return classifier, classes, isBad, averageAccuracy + classifier, classes, isBad, averageScore = classifier.fit_hdf5(monoviewDataset, CLASS_LABELS, weights, metric) + logging.debug("\t\t\t"+viewName + " : " + str(averageScore)) + return classifier, classes, isBad, averageScore def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, randomState, metric=None, nIter=None): @@ -92,7 +130,10 @@ def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, DATASET.get("Labels").value[learningIndices], randomState, metric=metric)) logging.debug("\tDone:\t Gridsearch for " + classifierName) - return bestSettings, None + if None in bestSettings: + return None, None + else: + return bestSettings, None def getCLString(classificationKWARGS): @@ -104,6 +145,13 @@ class Mumbo: self.maxIter = kwargs["maxIter"] self.minIter = kwargs["minIter"] self.threshold = kwargs["threshold"] + classifiersClasses = [] + for classifierName in kwargs["classifiersNames"]: + classifierModule = getattr(Classifiers, classifierName) + classifiersClasses.append(getattr(classifierModule, classifierName)) + self.monoviewClassifiers = [classifierClass(**classifierConfig) + for classifierClass, classifierConfig + in zip(classifiersClasses, kwargs["classifiersConfigs"])] self.classifiersNames = kwargs["classifiersNames"] self.classifiersConfigs = kwargs["classifiersConfigs"] nbView = kwargs["nbView"] @@ -114,17 +162,17 @@ class Mumbo: self.generalAlphas = np.zeros(self.maxIter) self.bestClassifiers = [] self.bestViews = np.zeros(self.maxIter, dtype=int) - 1 - self.averageAccuracies = np.zeros((self.maxIter, nbView)) + self.averageScores = np.zeros((self.maxIter, nbView)) self.iterAccuracies = np.zeros(self.maxIter) self.randomState = randomState - def initDataDependant(self, datasetLength, nbView, nbClass, labels): + def initDataDependant(self, trainLength, nbView, nbClass, labels): self.edges = np.zeros((self.maxIter, nbView)) self.alphas = np.zeros((self.maxIter, nbView)) self.generalAlphas = np.zeros(self.maxIter) self.bestClassifiers = [] self.bestViews = np.zeros(self.maxIter, dtype=int) - 1 - self.averageAccuracies = np.zeros((self.maxIter, nbView)) + self.averageScores = np.zeros((self.maxIter, nbView)) self.costMatrices = np.array([ np.array([ np.array([ @@ -132,10 +180,10 @@ class Mumbo: else -(nbClass - 1) for classe in range(nbClass) ]) for exampleIndice in - range(datasetLength) - ]) for viewIndice in range(nbView)]) + range(trainLength) + ]) for _ in range(nbView)]) if iteration == 0 - else np.zeros((nbView, datasetLength, nbClass)) + else np.zeros((nbView, trainLength, nbClass)) for iteration in range(self.maxIter + 1) ]) self.generalCostMatrix = np.array([ @@ -143,71 +191,76 @@ class Mumbo: np.array([1 if labels[exampleIndice] != classe else -(nbClass - 1) for classe in range(nbClass) - ]) for exampleIndice in range(datasetLength) - ]) for iteration in range(self.maxIter) + ]) for exampleIndice in range(trainLength) + ]) for _ in range(self.maxIter) ]) - self.fs = np.zeros((self.maxIter, nbView, datasetLength, nbClass)) - self.ds = np.zeros((self.maxIter, nbView, datasetLength)) - self.predictions = np.zeros((self.maxIter, nbView, datasetLength)) - self.generalFs = np.zeros((self.maxIter, datasetLength, nbClass)) + self.fs = np.zeros((self.maxIter, nbView, trainLength, nbClass)) + self.ds = np.zeros((self.maxIter, nbView, trainLength)) + self.predictions = np.zeros((self.maxIter, nbView, trainLength)) + self.generalFs = np.zeros((self.maxIter, trainLength, nbClass)) - def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): + def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): # Initialization - if trainIndices is None: - trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(viewsIndices) == type(None): - viewsIndices = range(DATASET.get("Metadata").attrs["nbView"]) - NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] - NB_VIEW = len(viewsIndices) - DATASET_LENGTH = len(trainIndices) - LABELS = DATASET.get("Labels").value[trainIndices] - self.initDataDependant(DATASET_LENGTH, NB_VIEW, NB_CLASS, LABELS) - # Learning - isStabilized = False - self.iterIndex = 0 - while not isStabilized and not self.iterIndex >= self.maxIter - 1: - if self.iterIndex > self.minIter: - coeffs = np.polyfit(np.log(np.arange(self.iterIndex) + 0.00001), self.iterAccuracies[:self.iterIndex], - 1) - if coeffs[0] / self.iterIndex < self.threshold: - isStabilized = True - - logging.debug('\t\tStart:\t Iteration ' + str(self.iterIndex + 1)) - classifiers, predictedLabels, areBad = self.trainWeakClassifiers_hdf5(DATASET, trainIndices, NB_CLASS, - DATASET_LENGTH, viewsIndices) - if areBad.all(): - logging.warning("\t\tWARNING:\tAll bad for iteration " + str(self.iterIndex)) - - self.predictions[self.iterIndex] = predictedLabels - - for viewFakeIndex in range(NB_VIEW): - self.computeEdge(viewFakeIndex, DATASET_LENGTH, LABELS) - if areBad[viewFakeIndex]: - self.alphas[self.iterIndex, viewFakeIndex] = 0. + if self.classifiersConfigs is None: + pass + else: + if trainIndices is None: + trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + if type(viewsIndices) == type(None): + viewsIndices = range(DATASET.get("Metadata").attrs["nbView"]) + NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] + NB_VIEW = len(viewsIndices) + trainLength = len(trainIndices) + LABELS = DATASET.get("Labels").value[trainIndices] + self.initDataDependant(trainLength, NB_VIEW, NB_CLASS, LABELS) + # Learning + isStabilized = False + self.iterIndex = 0 + while not isStabilized and not self.iterIndex >= self.maxIter - 1: + if self.iterIndex > self.minIter: + coeffs = np.polyfit(np.log(np.arange(self.iterIndex) + 0.00001), self.iterAccuracies[:self.iterIndex], + 1) + if abs(coeffs[0]) / self.iterIndex < self.threshold: + isStabilized = True else: - self.alphas[self.iterIndex, viewFakeIndex] = self.computeAlpha( - self.edges[self.iterIndex, viewFakeIndex]) - - self.updateDs(LABELS, NB_VIEW, DATASET_LENGTH) - self.updateFs(NB_VIEW, DATASET_LENGTH, NB_CLASS) - - self.updateCostmatrices(NB_VIEW, DATASET_LENGTH, NB_CLASS, LABELS) - bestView, edge, bestFakeView = self.chooseView(viewsIndices, LABELS, DATASET_LENGTH) - self.bestViews[self.iterIndex] = bestView - logging.debug("\t\t\t Best view : \t\t" + DATASET.get("View" + str(bestView)).attrs["name"]) - if areBad.all(): - self.generalAlphas[self.iterIndex] = 0. - else: - self.generalAlphas[self.iterIndex] = self.computeAlpha(edge) - self.bestClassifiers.append(classifiers[bestFakeView]) - self.updateGeneralFs(DATASET_LENGTH, NB_CLASS, bestFakeView) - self.updateGeneralCostMatrix(DATASET_LENGTH, NB_CLASS, LABELS) - predictedLabels = self.predict_hdf5(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) - accuracy = accuracy_score(DATASET.get("Labels").value[trainIndices], predictedLabels) - self.iterAccuracies[self.iterIndex] = accuracy + pass + + logging.debug('\t\tStart:\t Iteration ' + str(self.iterIndex + 1)) + classifiers, predictedLabels, areBad = self.trainWeakClassifiers_hdf5(DATASET, trainIndices, NB_CLASS, + trainLength, viewsIndices, metric) + if areBad.all(): + logging.warning("\t\tWARNING:\tAll bad for iteration " + str(self.iterIndex)) - self.iterIndex += 1 + self.predictions[self.iterIndex] = predictedLabels + + for viewFakeIndex in range(NB_VIEW): + self.computeEdge(viewFakeIndex, trainLength, LABELS) + if areBad[viewFakeIndex]: + self.alphas[self.iterIndex, viewFakeIndex] = 0. + else: + self.alphas[self.iterIndex, viewFakeIndex] = self.computeAlpha( + self.edges[self.iterIndex, viewFakeIndex]) + + self.updateDs(LABELS, NB_VIEW, trainLength) + self.updateFs(NB_VIEW, trainLength, NB_CLASS) + + self.updateCostmatrices(NB_VIEW, trainLength, NB_CLASS, LABELS) + bestView, edge, bestFakeView = self.chooseView(viewsIndices, LABELS, trainLength) + self.bestViews[self.iterIndex] = bestView + logging.debug("\t\t\t Best view : \t\t" + DATASET.get("View" + str(bestView)).attrs["name"]) + if areBad.all(): + self.generalAlphas[self.iterIndex] = 0. + else: + self.generalAlphas[self.iterIndex] = self.computeAlpha(edge) + self.bestClassifiers.append(classifiers[bestFakeView]) + self.updateGeneralFs(trainLength, NB_CLASS, bestFakeView) + self.updateGeneralCostMatrix(trainLength, NB_CLASS, LABELS) + predictedLabels = self.predict_hdf5(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) + accuracy = accuracy_score(DATASET.get("Labels").value[trainIndices], predictedLabels) + self.iterAccuracies[self.iterIndex] = accuracy + + self.iterIndex += 1 def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] @@ -215,24 +268,26 @@ class Mumbo: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if viewsIndices is None: viewsIndices = range(DATASET.get("Metadata").attrs["nbView"]) - - viewDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewsIndices)) - if usedIndices is not None: - DATASET_LENGTH = len(usedIndices) - predictedLabels = np.zeros(DATASET_LENGTH) - - for labelIndex, exampleIndex in enumerate(usedIndices): - votes = np.zeros(NB_CLASS) - for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): - if view != -1: - data = getV(DATASET, int(view), int(exampleIndex)) - votes[int(classifier.predict(np.array([data])))] += alpha[viewDict[view]] - else: - pass - predictedLabels[labelIndex] = np.argmax(votes) + if self.classifiersConfigs is None: + return np.zeros(len(usedIndices), dtype=int) else: - predictedLabels = [] - return predictedLabels + viewDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewsIndices)) + if usedIndices is not None: + DATASET_LENGTH = len(usedIndices) + predictedLabels = np.zeros(DATASET_LENGTH) + + for labelIndex, exampleIndex in enumerate(usedIndices): + votes = np.zeros(NB_CLASS) + for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): + if view != -1: + data = getV(DATASET, int(view), int(exampleIndex)) + votes[int(classifier.predict(np.array([data])))] += alpha[viewDict[view]] + else: + pass + predictedLabels[labelIndex] = np.argmax(votes) + else: + predictedLabels = [] + return predictedLabels def predict_proba_hdf5(self, DATASET, usedIndices=None): NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] @@ -240,41 +295,43 @@ class Mumbo: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) DATASET_LENGTH = len(usedIndices) predictedProbas = np.zeros((DATASET_LENGTH, NB_CLASS)) - - for labelIndex, exampleIndex in enumerate(usedIndices): - for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): - data = getV(DATASET, int(view), exampleIndex) - predictedProbas[labelIndex, int(classifier.predict(np.array([data])))] += alpha[view] - predictedProbas[labelIndex, :] = predictedProbas[labelIndex, :] / np.sum(predictedProbas[labelIndex, :]) - return predictedProbas - - def trainWeakClassifiers(self, DATASET, CLASS_LABELS, NB_CLASS, DATASET_LENGTH, NB_VIEW): - trainedClassifiers = [] - labelsMatrix = [] - areBad = [] - if self.nbCores > NB_VIEW: - NB_JOBS = NB_VIEW + if self.classifiersConfigs is None: + predictedProbas[:,0]=1.0 else: - NB_JOBS = self.nbCores - classifiersConfigs = self.classifiersConfigs - costMatrices = self.costMatrices - classifiersNames = self.classifiersNames - iterIndex = self.iterIndex - trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)( - delayed(trainWeakClassifier)(classifiersNames[viewIndice], DATASET[viewIndice], CLASS_LABELS, - DATASET_LENGTH, viewIndice, classifiersConfigs[viewIndice], iterIndex, - costMatrices) - for viewIndice in range(NB_VIEW)) + for labelIndex, exampleIndex in enumerate(usedIndices): + for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): + data = getV(DATASET, int(view), exampleIndex) + predictedProbas[labelIndex, int(classifier.predict(np.array([data])))] += alpha[view] + predictedProbas[labelIndex, :] = predictedProbas[labelIndex, :] / np.sum(predictedProbas[labelIndex, :]) + return predictedProbas - for viewIndex, (classifier, labelsArray, isBad, averageAccuracy) in enumerate(trainedClassifiersAndLabels): - self.averageAccuracies[self.iterIndex, viewIndex] = averageAccuracy - trainedClassifiers.append(classifier) - labelsMatrix.append(labelsArray) - areBad.append(isBad) - return np.array(trainedClassifiers), np.array(labelsMatrix), np.array(areBad) + # def trainWeakClassifiers(self, DATASET, CLASS_LABELS, NB_CLASS, DATASET_LENGTH, NB_VIEW): + # trainedClassifiers = [] + # labelsMatrix = [] + # areBad = [] + # if self.nbCores > NB_VIEW: + # NB_JOBS = NB_VIEW + # else: + # NB_JOBS = self.nbCores + # classifiersConfigs = self.classifiersConfigs + # costMatrices = self.costMatrices + # classifiersNames = self.classifiersNames + # iterIndex = self.iterIndex + # trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)( + # delayed(trainWeakClassifier)(classifiersNames[viewIndice], DATASET[viewIndice], CLASS_LABELS, + # DATASET_LENGTH, viewIndice, classifiersConfigs[viewIndice], iterIndex, + # costMatrices) + # for viewIndice in range(NB_VIEW)) + # + # for viewIndex, (classifier, labelsArray, isBad, averageAccuracy) in enumerate(trainedClassifiersAndLabels): + # self.averageScores[self.iterIndex, viewIndex] = averageAccuracy + # trainedClassifiers.append(classifier) + # labelsMatrix.append(labelsArray) + # areBad.append(isBad) + # return np.array(trainedClassifiers), np.array(labelsMatrix), np.array(areBad) def trainWeakClassifiers_hdf5(self, DATASET, trainIndices, NB_CLASS, - DATASET_LENGTH, viewIndices): + DATASET_LENGTH, viewIndices, metric): NB_VIEW = len(viewIndices) trainedClassifiers = [] labelsMatrix = [] @@ -286,19 +343,20 @@ class Mumbo: classifiersConfigs = self.classifiersConfigs costMatrices = self.costMatrices classifiersNames = self.classifiersNames + classifiers = self.monoviewClassifiers iterIndex = self.iterIndex trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)( - delayed(trainWeakClassifier_hdf5)(classifiersNames[classifierIndex], + delayed(trainWeakClassifier_hdf5)(classifiers[classifierIndex], classifiersNames[classifierIndex], getV(DATASET, viewIndex, trainIndices), DATASET.get("Labels").value[trainIndices], DATASET_LENGTH, viewIndex, classifiersConfigs[classifierIndex], DATASET.get("View" + str(viewIndex)).attrs["name"], iterIndex, - costMatrices, classifierIndex, self.randomState) + costMatrices, classifierIndex, self.randomState, metric) for classifierIndex, viewIndex in enumerate(viewIndices)) - for viewFakeIndex, (classifier, labelsArray, isBad, averageAccuracy) in enumerate(trainedClassifiersAndLabels): - self.averageAccuracies[self.iterIndex, viewFakeIndex] = averageAccuracy + for viewFakeIndex, (classifier, labelsArray, isBad, averageScore) in enumerate(trainedClassifiersAndLabels): + self.averageScores[self.iterIndex, viewFakeIndex] = averageScore trainedClassifiers.append(classifier) labelsMatrix.append(labelsArray) areBad.append(isBad) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py index ca3091911b40031d3df15e4670ede3c764c4825a..7a853245cb8d039dbcfc83e76a44a9885f51e12d 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/analyzeResults.py @@ -130,7 +130,7 @@ def getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, trainLab testScore = metricModule.score(CLASS_LABELS[validationIndices], testLabels) mumboClassifier = classifier maxIter = mumboClassifier.iterIndex - meanAverageAccuracies = np.mean(mumboClassifier.averageAccuracies, axis=0) + meanAverageAccuracies = np.mean(mumboClassifier.averageScores, axis=0) viewsStats = np.array([float(list(mumboClassifier.bestViews).count(viewIndex)) / len(mumboClassifier.bestViews) for viewIndex in range(nbView)]) PredictedTrainLabelsByIter = mumboClassifier.classifyMumbobyIter_hdf5(DATASET, fakeViewsIndicesDict, @@ -230,41 +230,46 @@ def execute(classifier, trainLabels, hyperParamSearch, nIter, metrics, viewsIndices, randomState): learningIndices, validationIndices = classificationIndices - LEARNING_RATE = len(learningIndices) / (len(learningIndices) + len(validationIndices)) - nbFolds = KFolds.n_splits - - CLASS_LABELS = DATASET.get("Labels")[...] - - dbConfigurationString, viewNames = getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, - LABELS_DICTIONARY) - algoConfigurationString, classifierAnalysis = getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, - hyperParamSearch, nIter, times) - - (totalScoreOnTrain, totalScoreOnTest, meanAverageAccuracies, viewsStats, scoresOnTainByIter, - scoresOnTestByIter) = getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, - trainLabels, testLabels, viewsIndices, metrics[0]) - - stringAnalysis = "\t\tResult for Multiview classification with Mumbo with random state : " + str(randomState) + \ - "\n\nAverage " + metrics[0][0] + " :\n\t-On Train : " + str( - totalScoreOnTrain) + "\n\t-On Test : " + \ - str(totalScoreOnTest) - stringAnalysis += dbConfigurationString - stringAnalysis += algoConfigurationString - metricsScores = getMetricsScores(metrics, trainLabels, testLabels, - DATASET, validationIndices, learningIndices) - stringAnalysis += printMetricScore(metricsScores, metrics) - stringAnalysis += "Mean average scores and stats :" - for viewIndex, (meanAverageAccuracy, bestViewStat) in enumerate(zip(meanAverageAccuracies, viewsStats)): - stringAnalysis += "\n\t- On " + viewNames[viewIndex] + \ - " : \n\t\t- Mean average Accuracy : " + str(meanAverageAccuracy) + \ - "\n\t\t- Percentage of time chosen : " + str(bestViewStat) - stringAnalysis += "\n\n For each iteration : " - for iterIndex in range(len(scoresOnTainByIter)): - stringAnalysis += "\n\t- Iteration " + str(iterIndex + 1) - stringAnalysis += "\n\t\tScore on train : " + \ - str(scoresOnTainByIter[iterIndex]) + '\n\t\tScore on test : ' + \ - str(scoresOnTestByIter[iterIndex]) - - name, image = plotAccuracyByIter(scoresOnTainByIter, scoresOnTestByIter, views, classifierAnalysis) - imagesAnalysis = {name: image} - return stringAnalysis, imagesAnalysis, metricsScores + if classifier.classifiersConfigs is None: + metricsScores = getMetricsScores(metrics, trainLabels, testLabels, + DATASET, validationIndices, learningIndices) + return "No good setting for monoview classifier", None, metricsScores + else: + LEARNING_RATE = len(learningIndices) / (len(learningIndices) + len(validationIndices)) + nbFolds = KFolds.n_splits + + CLASS_LABELS = DATASET.get("Labels")[...] + + dbConfigurationString, viewNames = getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, + LABELS_DICTIONARY) + algoConfigurationString, classifierAnalysis = getAlgoConfig(classifier, classificationKWARGS, nbCores, viewNames, + hyperParamSearch, nIter, times) + + (totalScoreOnTrain, totalScoreOnTest, meanAverageAccuracies, viewsStats, scoresOnTainByIter, + scoresOnTestByIter) = getReport(classifier, CLASS_LABELS, classificationIndices, DATASET, + trainLabels, testLabels, viewsIndices, metrics[0]) + + stringAnalysis = "\t\tResult for Multiview classification with Mumbo with random state : " + str(randomState) + \ + "\n\nAverage " + metrics[0][0] + " :\n\t-On Train : " + str( + totalScoreOnTrain) + "\n\t-On Test : " + \ + str(totalScoreOnTest) + stringAnalysis += dbConfigurationString + stringAnalysis += algoConfigurationString + metricsScores = getMetricsScores(metrics, trainLabels, testLabels, + DATASET, validationIndices, learningIndices) + stringAnalysis += printMetricScore(metricsScores, metrics) + stringAnalysis += "Mean average scores and stats :" + for viewIndex, (meanAverageAccuracy, bestViewStat) in enumerate(zip(meanAverageAccuracies, viewsStats)): + stringAnalysis += "\n\t- On " + viewNames[viewIndex] + \ + " : \n\t\t- Mean average Accuracy : " + str(meanAverageAccuracy) + \ + "\n\t\t- Percentage of time chosen : " + str(bestViewStat) + stringAnalysis += "\n\n For each iteration : " + for iterIndex in range(len(scoresOnTainByIter)): + stringAnalysis += "\n\t- Iteration " + str(iterIndex + 1) + stringAnalysis += "\n\t\tScore on train : " + \ + str(scoresOnTainByIter[iterIndex]) + '\n\t\tScore on test : ' + \ + str(scoresOnTestByIter[iterIndex]) + + name, image = plotAccuracyByIter(scoresOnTainByIter, scoresOnTestByIter, views, classifierAnalysis) + imagesAnalysis = {name: image} + return stringAnalysis, imagesAnalysis, metricsScores