diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py index 094dbcdfb7f08f73ad951042d655df6c336b08dd..6585a15c3391cb41e04d48f09791d8b1b10cbbba 100644 --- a/Code/MonoMultiViewClassifiers/ExecClassif.py +++ b/Code/MonoMultiViewClassifiers/ExecClassif.py @@ -69,9 +69,15 @@ def initBenchmark(args): def genViewsDictionnary(DATASET): datasetsNames = DATASET.keys() - viewsDictionary = dict((DATASET.get(datasetName).attrs["name"], int(datasetName[4:])) - for datasetName in datasetsNames - if datasetName[:4]=="View") + viewsDictionary = {} + for datasetName in datasetsNames: + if datasetName[:4]=="View": + viewName = DATASET.get(datasetName).attrs["name"] + if type(viewName)!=bytes: + viewsDictionary[viewName] = int(datasetName[4:]) + else: + viewsDictionary[viewName.decode("utf-8")] = int(datasetName[4:]) + return viewsDictionary @@ -264,8 +270,10 @@ def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=Non logging.debug("Start:\t Monoview benchmark") for arguments in argumentDictionaries["Monoview"]: kwargs = arguments["args"] - views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in - range(DATASET.get("Metadata").attrs["nbView"])] + views = [DATASET.get("View" + str(viewIndex)).attrs["name"] + if type(DATASET.get("View" + str(viewIndex)).attrs["name"])!=bytes + else DATASET.get("View" + str(viewIndex)).attrs["name"].decode("utf-8") + for viewIndex in range(DATASET.get("Metadata").attrs["nbView"])] neededViewIndex = views.index(kwargs["feat"]) X = DATASET.get("View" + str(neededViewIndex)) Y = labels @@ -371,7 +379,11 @@ def execClassif(arguments): viewsDictionary = genViewsDictionnary(DATASET) nbViews = DATASET.get("Metadata").attrs["nbView"] - views = [DATASET.get("View"+str(viewIndex)).attrs["name"] for viewIndex in range(nbViews)] + + views = [DATASET.get("View"+str(viewIndex)).attrs["name"] + if type(DATASET.get("View"+str(viewIndex)).attrs["name"])!=bytes + else DATASET.get("View"+str(viewIndex)).attrs["name"].decode("utf-8") + for viewIndex in range(nbViews)] NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] metrics = [metric.split(":") for metric in args.CL_metrics] diff --git a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index ee0005fa04b2547af259e737edc9b6a4798a7cf9..31b5e549f31bc6dd20ac153be5feb86bdf795576 100644 --- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -187,87 +187,88 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol if __name__ == '__main__': + import argparse pass - # parser = argparse.ArgumentParser( - # description='This methods permits to execute a multiclass classification with one single view. At this point the used classifier is a RandomForest. The GridSearch permits to vary the number of trees and CrossValidation with k-folds. The result will be a plot of the score per class and a CSV with the best classifier found by the GridSearch.', - # formatter_class=argparse.ArgumentDefaultsHelpFormatter) - # - # groupStandard = parser.add_argument_group('Standard arguments') - # groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') - # groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5") - # groupStandard.add_argument('--name', metavar='STRING', action='store', - # help='Name of Database (default: %(default)s)', default='DB') - # groupStandard.add_argument('--feat', metavar='STRING', action='store', - # help='Name of Feature for Classification (default: %(default)s)', default='RGB') - # groupStandard.add_argument('--pathF', metavar='STRING', action='store', - # help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') - # groupStandard.add_argument('--fileCL', metavar='STRING', action='store', - # help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') - # groupStandard.add_argument('--fileCLD', metavar='STRING', action='store', - # help='Name of classLabels-Description CSV-file (default: %(default)s)', - # default='classLabels-Description.csv') - # groupStandard.add_argument('--fileFeat', metavar='STRING', action='store', - # help='Name of feature CSV-file (default: %(default)s)', default='feature.csv') - # - # groupClass = parser.add_argument_group('Classification arguments') - # groupClass.add_argument('--CL_type', metavar='STRING', action='store', help='Classifier to use', - # default="RandomForest") - # groupClass.add_argument('--CL_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int, - # default=10) - # groupClass.add_argument('--CL_Cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, - # default=1) - # groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Split ratio for train and test', - # type=float, default=0.9) - # groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', - # help='Determine which metrics to use, separate with ":" if multiple, if empty, considering all', - # default='') - # - # groupClassifier = parser.add_argument_group('Classifier Config') - # groupClassifier.add_argument('--CL_config', metavar='STRING', nargs="+", action='store', - # help='GridSearch: Determine the trees', default=['25:75:125:175']) - # - # args = parser.parse_args() - # - # classifierKWARGS = dict((key, value) for key, value in enumerate([arg.split(":") for arg in args.CL_config])) - # ### Main Programm - # - # - # # Configure Logger - # directory = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/" - # logfilename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-LOG" - # logfile = directory + logfilename - # if os.path.isfile(logfile + ".log"): - # for i in range(1, 20): - # testFileName = logfilename + "-" + str(i) + ".log" - # if not os.path.isfile(directory + testFileName): - # logfile = directory + testFileName - # break - # else: - # logfile += ".log" - # - # logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG, - # filemode='w') - # - # if args.log: - # logging.getLogger().addHandler(logging.StreamHandler()) - # - # # Read the features - # logging.debug("Start:\t Read " + args.type + " Files") - # - # if args.type == ".csv": - # X = np.genfromtxt(args.pathF + args.fileFeat, delimiter=';') - # Y = np.genfromtxt(args.pathF + args.fileCL, delimiter=';') - # elif args.type == ".hdf5": - # dataset = h5py.File(args.pathF + args.name + ".hdf5", "r") - # viewsDict = dict((dataset.get("View" + str(viewIndex)).attrs["name"], viewIndex) for viewIndex in - # range(dataset.get("Metadata").attrs["nbView"])) - # X = dataset["View" + str(viewsDict[args.feat])][...] - # Y = dataset["Labels"][...] - # - # logging.debug("Info:\t Shape of Feature:" + str(X.shape) + ", Length of classLabels vector:" + str(Y.shape)) - # logging.debug("Done:\t Read CSV Files") - # - # arguments = {args.CL_type + "KWARGS": classifierKWARGS, "feat": args.feat, "fileFeat": args.fileFeat, - # "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type} - # ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, - # metrics=args.CL_metrics, **arguments) + parser = argparse.ArgumentParser( + description='This methods permits to execute a multiclass classification with one single view. At this point the used classifier is a RandomForest. The GridSearch permits to vary the number of trees and CrossValidation with k-folds. The result will be a plot of the score per class and a CSV with the best classifier found by the GridSearch.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + groupStandard = parser.add_argument_group('Standard arguments') + groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') + groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5") + groupStandard.add_argument('--name', metavar='STRING', action='store', + help='Name of Database (default: %(default)s)', default='DB') + groupStandard.add_argument('--feat', metavar='STRING', action='store', + help='Name of Feature for Classification (default: %(default)s)', default='RGB') + groupStandard.add_argument('--pathF', metavar='STRING', action='store', + help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') + groupStandard.add_argument('--fileCL', metavar='STRING', action='store', + help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') + groupStandard.add_argument('--fileCLD', metavar='STRING', action='store', + help='Name of classLabels-Description CSV-file (default: %(default)s)', + default='classLabels-Description.csv') + groupStandard.add_argument('--fileFeat', metavar='STRING', action='store', + help='Name of feature CSV-file (default: %(default)s)', default='feature.csv') + + groupClass = parser.add_argument_group('Classification arguments') + groupClass.add_argument('--CL_type', metavar='STRING', action='store', help='Classifier to use', + default="RandomForest") + groupClass.add_argument('--CL_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int, + default=10) + groupClass.add_argument('--CL_Cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, + default=1) + groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Split ratio for train and test', + type=float, default=0.9) + groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', + help='Determine which metrics to use, separate with ":" if multiple, if empty, considering all', + default='') + + groupClassifier = parser.add_argument_group('Classifier Config') + groupClassifier.add_argument('--CL_config', metavar='STRING', nargs="+", action='store', + help='GridSearch: Determine the trees', default=['25:75:125:175']) + + args = parser.parse_args() + + classifierKWARGS = dict((key, value) for key, value in enumerate([arg.split(":") for arg in args.CL_config])) + ### Main Programm + + + # Configure Logger + directory = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/" + logfilename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-LOG" + logfile = directory + logfilename + if os.path.isfile(logfile + ".log"): + for i in range(1, 20): + testFileName = logfilename + "-" + str(i) + ".log" + if not os.path.isfile(directory + testFileName): + logfile = directory + testFileName + break + else: + logfile += ".log" + + logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG, + filemode='w') + + if args.log: + logging.getLogger().addHandler(logging.StreamHandler()) + + # Read the features + logging.debug("Start:\t Read " + args.type + " Files") + + if args.type == ".csv": + X = np.genfromtxt(args.pathF + args.fileFeat, delimiter=';') + Y = np.genfromtxt(args.pathF + args.fileCL, delimiter=';') + elif args.type == ".hdf5": + dataset = h5py.File(args.pathF + args.name + ".hdf5", "r") + viewsDict = dict((dataset.get("View" + str(viewIndex)).attrs["name"], viewIndex) for viewIndex in + range(dataset.get("Metadata").attrs["nbView"])) + X = dataset["View" + str(viewsDict[args.feat])][...] + Y = dataset["Labels"][...] + + logging.debug("Info:\t Shape of Feature:" + str(X.shape) + ", Length of classLabels vector:" + str(Y.shape)) + logging.debug("Done:\t Read CSV Files") + + arguments = {args.CL_type + "KWARGS": classifierKWARGS, "feat": args.feat, "fileFeat": args.fileFeat, + "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type} + ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, + metrics=args.CL_metrics, **arguments) diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/MumboModule.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/MumboModule.py index 50606d0e3435efc73d1b2b4d9418713bce52e811..fac4de3136af85ceaffb2b76f182fa1fe1adb3ba 100644 --- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/MumboModule.py +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/MumboModule.py @@ -118,6 +118,8 @@ def trainWeakClassifier_hdf5(classifier, classifierName, monoviewDataset, CLASS_ randomState, metric): weights = computeWeights(DATASET_LENGTH, iterIndex, classifierIndex, CLASS_LABELS, costMatrices) classifier, classes, isBad, averageScore = classifier.fit_hdf5(monoviewDataset, CLASS_LABELS, weights, metric) + if type(viewName) == bytes: + viewName = viewName.decode("utf-8") logging.debug("\t\t\t"+viewName + " : " + str(averageScore)) return classifier, classes, isBad, averageScore @@ -126,8 +128,7 @@ def gridSearch_hdf5(DATASET, labels, viewIndices, classificationKWARGS, learning classifiersNames = classificationKWARGS["classifiersNames"] bestSettings = [] for classifierIndex, classifierName in enumerate(classifiersNames): - logging.debug("\tStart:\t Random search for " + classifierName + " on " + - DATASET.get("View" + str(viewIndices[classifierIndex])).attrs["name"]) + logging.debug("\tStart:\t Random search for " + classifierName + " on View" + str(viewIndices[classifierIndex])) classifierModule = getattr(Classifiers, classifierName) # Permet d'appeler une fonction avec une string classifierGridSearch = getattr(classifierModule, "hyperParamSearch") bestSettings.append(classifierGridSearch(getV(DATASET, viewIndices[classifierIndex], learningIndices), @@ -252,7 +253,7 @@ class MumboClass: self.updateCostmatrices(NB_VIEW, trainLength, NB_CLASS, LABELS) bestView, edge, bestFakeView = self.chooseView(viewsIndices, LABELS, trainLength) self.bestViews[self.iterIndex] = bestView - logging.debug("\t\t\t Best view : \t\t" + DATASET.get("View" + str(bestView)).attrs["name"]) + logging.debug("\t\t\t Best view : \t\t View" + str(bestView)) if areBad.all(): self.generalAlphas[self.iterIndex] = 0. else: diff --git a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py index 0f00d2ad9f474d710a5132de218a0ee7c800b218..15a2acd899043ea30766d9939940882b3465b4f8 100644 --- a/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py +++ b/Code/MonoMultiViewClassifiers/MultiviewClassifiers/Mumbo/analyzeResults.py @@ -72,7 +72,10 @@ def error(testLabels, computedLabels): def getDBConfig(DATASET, LEARNING_RATE, nbFolds, databaseName, validationIndices, LABELS_DICTIONARY): nbView = DATASET.get("Metadata").attrs["nbView"] - viewNames = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in range(nbView)] + viewNames = [DATASET.get("View" + str(viewIndex)).attrs["name"] + if type(DATASET.get("View" + str(viewIndex)).attrs["name"]) != bytes + else DATASET.get("View" + str(viewIndex)).attrs["name"].decode("utf-8") + for viewIndex in range(nbView)] viewShapes = [getShape(DATASET, viewIndex) for viewIndex in range(nbView)] DBString = "Dataset info :\n\t-Dataset name : " + databaseName DBString += "\n\t-Labels : " + ', '.join(LABELS_DICTIONARY.values()) diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py index 840f4da70533bfd06456330c04a43c0e36a7dd39..325b05a5478b9bdccfddc6a4fbd8ca94f46e3d91 100644 --- a/Code/MonoMultiViewClassifiers/utils/execution.py +++ b/Code/MonoMultiViewClassifiers/utils/execution.py @@ -274,11 +274,19 @@ def initViews(DATASET, args): NB_VIEW = DATASET.get("Metadata").attrs["nbView"] if args.views != [""]: allowedViews = args.views - allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)] - views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW) if - str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews] - viewsIndices = [viewIndex for viewIndex in range(NB_VIEW) if - str(DATASET.get("View" + str(viewIndex)).attrs["name"]) in allowedViews] + allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) + if type(DATASET.get("View" + str(viewIndex)).attrs["name"])!=bytes + else DATASET.get("View" + str(viewIndex)).attrs["name"].decode("utf-8") + for viewIndex in range(NB_VIEW)] + views = [] + viewsIndices = [] + for viewIndex in range(NB_VIEW): + viewName = DATASET.get("View" + str(viewIndex)).attrs["name"] + if type(viewName) == bytes: + viewName = viewName.decode("utf-8") + if viewName in allowedViews: + views.append(viewName) + viewsIndices.append(viewsIndices) return views, viewsIndices, allViews else: views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]