diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py index 302eab9ec82bca582b2a558c838d9dc165683cc5..094dbcdfb7f08f73ad951042d655df6c336b08dd 100644 --- a/Code/MonoMultiViewClassifiers/ExecClassif.py +++ b/Code/MonoMultiViewClassifiers/ExecClassif.py @@ -356,7 +356,7 @@ def execClassif(arguments): getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:]) DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass, - args.CL_classes, randomState) + args.CL_classes, randomState, args.full) classificationIndices = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates) diff --git a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index 23592ea154be2093b9a36f5f179745b1d6d30e0c..ee0005fa04b2547af259e737edc9b6a4798a7cf9 100644 --- a/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -29,12 +29,14 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): except KeyError: kwargs = args t_start = time.time() - feat = X.attrs["name"] + if type(X.attrs["name"]) == bytes: + feat = X.attrs["name"].decode("utf-8") + else: + feat = X.attrs["name"] CL_type = kwargs["CL_type"] X = getValue(X) learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1])) labelsString = "-".join(labelsNames) - timestr = time.strftime("%Y%m%d-%H%M%S") CL_type_string = CL_type outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \ diff --git a/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py index 1114d4b36e5d7448efb8587579b6f6ca28352a38..981533ecb033c9ba4f45096dfe96a4d71bfad182 100644 --- a/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py +++ b/Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py @@ -52,7 +52,7 @@ def makeMeNoisy(viewData, randomState, percentage=15): return noisyViewData -def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", nbView=3, +def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, nbView=3, nbClass=2, datasetLength=347, randomStateInt=None): """Used to generate a plausible dataset to test the algorithms""" randomStateInt = 42 @@ -257,6 +257,9 @@ def filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, availableLabelsName def filterViews(datasetFile, temp_dataset, views, usedIndices): newViewIndex = 0 + if views == [""]: + for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): + copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex), "View" + str(viewIndex), usedIndices) for askedViewName in views: for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): viewName = datasetFile.get("View" + str(viewIndex)).attrs["name"] @@ -286,32 +289,38 @@ def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, dest newDset.attrs[key] = value -def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState): +def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False): """Used to load a hdf5 database""" - askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in askedLabelsNames] - datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") - fullLabels = datasetFile.get("Labels").value - temp_dataset = h5py.File(pathF+nameDB+"_temp_view_label_select.hdf5", "w") - datasetFile.copy("Metadata", temp_dataset) - labelsSet = getClasses(fullLabels) - availableLabelsNames = list(datasetFile.get("Labels").attrs["names"]) - askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, askedLabelsNames, - randomState, availableLabelsNames) + if full: + datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") + labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in + enumerate(datasetFile.get("Labels").attrs["names"])) + return datasetFile, labelsDictionary + else: + askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in askedLabelsNames] + datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") + fullLabels = datasetFile.get("Labels").value + temp_dataset = h5py.File(pathF+nameDB+"_temp_view_label_select.hdf5", "w") + datasetFile.copy("Metadata", temp_dataset) + labelsSet = getClasses(fullLabels) + availableLabelsNames = list(datasetFile.get("Labels").attrs["names"]) + askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, askedLabelsNames, + randomState, availableLabelsNames) - newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, - availableLabelsNames, askedLabelsNames) - temp_dataset.get("Metadata").attrs["datasetLength"] = len(usedIndices) - temp_dataset.get("Metadata").attrs["nbClass"] = NB_CLASS - temp_dataset.create_dataset("Labels", data=newLabels) - temp_dataset.get("Labels").attrs["names"] = newLabelsNames - filterViews(datasetFile, temp_dataset, views, usedIndices) + newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, + availableLabelsNames, askedLabelsNames) + temp_dataset.get("Metadata").attrs["datasetLength"] = len(usedIndices) + temp_dataset.get("Metadata").attrs["nbClass"] = NB_CLASS + temp_dataset.create_dataset("Labels", data=newLabels) + temp_dataset.get("Labels").attrs["names"] = newLabelsNames + filterViews(datasetFile, temp_dataset, views, usedIndices) - labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(temp_dataset.get("Labels").attrs["names"])) - return temp_dataset, labelsDictionary + labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in + enumerate(temp_dataset.get("Labels").attrs["names"])) + return temp_dataset, labelsDictionary -def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, delimiter=","): +def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, delimiter=","): # TODO : Update this one labelsNames = np.genfromtxt(pathF + nameDB + "-labels-names.csv", dtype='str', delimiter=delimiter) datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") @@ -333,7 +342,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomStat metaDataGrp.attrs["nbClass"] = len(labelsNames) metaDataGrp.attrs["datasetLength"] = len(labels) datasetFile.close() - datasetFile, labelsDictionary = getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState) + datasetFile, labelsDictionary = getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full) return datasetFile, labelsDictionary diff --git a/Code/MonoMultiViewClassifiers/utils/Interpret.py b/Code/MonoMultiViewClassifiers/utils/Interpret.py index e83b2e55b9d9f00adedb65cbd1b44cb8ebb29db1..03bf3c7d38ce7cb1a60da8ad1af4d36ba7817bce 100644 --- a/Code/MonoMultiViewClassifiers/utils/Interpret.py +++ b/Code/MonoMultiViewClassifiers/utils/Interpret.py @@ -16,7 +16,7 @@ def getFeatureImportance(classifier, directory, interpretString=""): featureImportancesSorted = featureImportances[sortedArgs][:50] featureIndicesSorted = sortedArgs[:50] fig, ax = plt.subplots() - x = np.arange(50) + x = np.arange(len(featureIndicesSorted)) formatter = FuncFormatter(percent) ax.yaxis.set_major_formatter(formatter) plt.bar(x, featureImportancesSorted) diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py index 206f2eea3c1cc366d3e27885d9231e74998ec031..840f4da70533bfd06456330c04a43c0e36a7dd39 100644 --- a/Code/MonoMultiViewClassifiers/utils/execution.py +++ b/Code/MonoMultiViewClassifiers/utils/execution.py @@ -38,6 +38,8 @@ def parseTheArgs(arguments): type=int, default=2) groupStandard.add_argument('--machine', metavar='STRING', action='store', help='Type of machine on which the script runs', default="PC") + groupStandard.add_argument('-full', action='store_true', help='Use option to use full dataset and no labels or view filtering') + groupClass = parser.add_argument_group('Classification arguments') groupClass.add_argument('--CL_multiclassMethod', metavar='STRING', action='store',