Skip to content
Snippets Groups Projects
Commit 01af11ef authored by bbauvin's avatar bbauvin
Browse files

Using full dataset

parent 1b2d5a16
Branches
Tags
No related merge requests found
......@@ -356,7 +356,7 @@ def execClassif(arguments):
getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:])
DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass,
args.CL_classes, randomState)
args.CL_classes, randomState, args.full)
classificationIndices = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates)
......
......@@ -29,12 +29,14 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory):
except KeyError:
kwargs = args
t_start = time.time()
if type(X.attrs["name"]) == bytes:
feat = X.attrs["name"].decode("utf-8")
else:
feat = X.attrs["name"]
CL_type = kwargs["CL_type"]
X = getValue(X)
learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1]))
labelsString = "-".join(labelsNames)
timestr = time.strftime("%Y%m%d-%H%M%S")
CL_type_string = CL_type
outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \
......
......@@ -52,7 +52,7 @@ def makeMeNoisy(viewData, randomState, percentage=15):
return noisyViewData
def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", nbView=3,
def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, nbView=3,
nbClass=2, datasetLength=347, randomStateInt=None):
"""Used to generate a plausible dataset to test the algorithms"""
randomStateInt = 42
......@@ -257,6 +257,9 @@ def filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, availableLabelsName
def filterViews(datasetFile, temp_dataset, views, usedIndices):
newViewIndex = 0
if views == [""]:
for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]):
copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex), "View" + str(viewIndex), usedIndices)
for askedViewName in views:
for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]):
viewName = datasetFile.get("View" + str(viewIndex)).attrs["name"]
......@@ -286,8 +289,14 @@ def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, dest
newDset.attrs[key] = value
def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState):
def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False):
"""Used to load a hdf5 database"""
if full:
datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in
enumerate(datasetFile.get("Labels").attrs["names"]))
return datasetFile, labelsDictionary
else:
askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in askedLabelsNames]
datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
fullLabels = datasetFile.get("Labels").value
......@@ -311,7 +320,7 @@ def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomSta
return temp_dataset, labelsDictionary
def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, delimiter=","):
def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, delimiter=","):
# TODO : Update this one
labelsNames = np.genfromtxt(pathF + nameDB + "-labels-names.csv", dtype='str', delimiter=delimiter)
datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w")
......@@ -333,7 +342,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomStat
metaDataGrp.attrs["nbClass"] = len(labelsNames)
metaDataGrp.attrs["datasetLength"] = len(labels)
datasetFile.close()
datasetFile, labelsDictionary = getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState)
datasetFile, labelsDictionary = getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full)
return datasetFile, labelsDictionary
......
......@@ -16,7 +16,7 @@ def getFeatureImportance(classifier, directory, interpretString=""):
featureImportancesSorted = featureImportances[sortedArgs][:50]
featureIndicesSorted = sortedArgs[:50]
fig, ax = plt.subplots()
x = np.arange(50)
x = np.arange(len(featureIndicesSorted))
formatter = FuncFormatter(percent)
ax.yaxis.set_major_formatter(formatter)
plt.bar(x, featureImportancesSorted)
......
......@@ -38,6 +38,8 @@ def parseTheArgs(arguments):
type=int, default=2)
groupStandard.add_argument('--machine', metavar='STRING', action='store',
help='Type of machine on which the script runs', default="PC")
groupStandard.add_argument('-full', action='store_true', help='Use option to use full dataset and no labels or view filtering')
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_multiclassMethod', metavar='STRING', action='store',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment