Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Supervised MultiModal Integration Tool
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Baptiste Bauvin
Supervised MultiModal Integration Tool
Commits
b4d13a7a
Commit
b4d13a7a
authored
7 years ago
by
bbauvin
Browse files
Options
Downloads
Patches
Plain Diff
moved to the graal
parent
6a7c80e5
No related branches found
No related tags found
No related merge requests found
Changes
2
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
+73
-58
73 additions, 58 deletions
Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
Code/Tests/Test_utils/test_GetMultiviewDB.py
+396
-290
396 additions, 290 deletions
Code/Tests/Test_utils/test_GetMultiviewDB.py
with
469 additions
and
348 deletions
Code/MonoMultiViewClassifiers/utils/GetMultiviewDb.py
+
73
−
58
View file @
b4d13a7a
...
...
@@ -6,6 +6,7 @@ import logging
import
h5py
import
operator
import
errno
import
csv
# Author-Info
__author__
=
"
Baptiste Bauvin
"
...
...
@@ -77,11 +78,11 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS, LABELS_NAME, nbView=3,
viewDset
=
datasetFile
.
create_dataset
(
"
View
"
+
str
(
viewIndex
),
viewData
.
shape
,
data
=
viewData
.
astype
(
np
.
uint8
))
viewDset
.
attrs
[
"
name
"
]
=
"
View
"
+
str
(
viewIndex
)
viewDset
.
attrs
[
"
sparse
"
]
=
False
viewDset
.
attrs
[
"
binary
"
]
=
True
#
viewDset.attrs["binary"] = True
labelsDset
=
datasetFile
.
create_dataset
(
"
Labels
"
,
CLASS_LABELS
.
shape
)
labelsDset
[...]
=
CLASS_LABELS
labelsDset
.
attrs
[
"
name
"
]
=
"
Labels
"
labelsDset
.
attrs
[
"
names
"
]
=
[
"
No
"
,
"
Yes
"
]
labelsDset
.
attrs
[
"
names
"
]
=
[
"
No
"
.
encode
(),
"
Yes
"
.
encode
()
]
metaDataGrp
=
datasetFile
.
create_group
(
"
Metadata
"
)
metaDataGrp
.
attrs
[
"
nbView
"
]
=
nbView
metaDataGrp
.
attrs
[
"
nbClass
"
]
=
2
...
...
@@ -223,8 +224,9 @@ def filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, availableLabelsName
def
filterViews
(
datasetFile
,
temp_dataset
,
views
,
usedIndices
):
newViewIndex
=
0
for
askedViewName
in
views
:
for
viewIndex
in
range
(
datasetFile
.
get
(
"
Metadata
"
).
attrs
[
"
nbView
"
]):
if
datasetFile
.
get
(
"
View
"
+
str
(
viewIndex
)).
attrs
[
"
name
"
]
in
views
:
if
datasetFile
.
get
(
"
View
"
+
str
(
viewIndex
)).
attrs
[
"
name
"
]
==
askedViewName
:
copyhdf5Dataset
(
datasetFile
,
temp_dataset
,
"
View
"
+
str
(
viewIndex
),
"
View
"
+
str
(
newViewIndex
),
usedIndices
)
newViewIndex
+=
1
else
:
...
...
@@ -232,6 +234,18 @@ def filterViews(datasetFile, temp_dataset, views, usedIndices):
temp_dataset
.
get
(
"
Metadata
"
).
attrs
[
"
nbView
"
]
=
len
(
views
)
def
copyhdf5Dataset
(
sourceDataFile
,
destinationDataFile
,
sourceDatasetName
,
destinationDatasetName
,
usedIndices
):
"""
Used to copy a view in a new dataset file using only the examples of usedIndices, and copying the args
"""
newDset
=
destinationDataFile
.
create_dataset
(
destinationDatasetName
,
data
=
sourceDataFile
.
get
(
sourceDatasetName
).
value
[
usedIndices
,:])
if
"
sparse
"
in
sourceDataFile
.
get
(
sourceDatasetName
).
attrs
.
keys
()
and
sourceDataFile
.
get
(
sourceDatasetName
).
attrs
[
"
sparse
"
]:
# TODO : Support sparse
pass
else
:
for
key
,
value
in
sourceDataFile
.
get
(
sourceDatasetName
).
attrs
.
items
():
newDset
.
attrs
[
key
]
=
value
def
getClassicDBhdf5
(
views
,
pathF
,
nameDB
,
NB_CLASS
,
askedLabelsNames
,
randomState
):
"""
Used to load a hdf5 database
"""
askedLabelsNames
=
[
askedLabelName
.
encode
(
"
utf8
"
)
for
askedLabelName
in
askedLabelsNames
]
...
...
@@ -247,27 +261,65 @@ def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomSta
newLabels
,
newLabelsNames
,
usedIndices
=
filterLabels
(
labelsSet
,
askedLabelsNamesSet
,
fullLabels
,
availableLabelsNames
,
askedLabelsNames
)
temp_dataset
.
get
(
"
Metadata
"
).
attrs
[
"
datasetLength
"
]
=
len
(
usedIndices
)
temp_dataset
.
get
(
"
Metadata
"
).
attrs
[
"
nbClass
"
]
=
NB_CLASS
temp_dataset
.
create_dataset
(
"
Labels
"
,
data
=
newLabels
)
temp_dataset
.
get
(
"
Labels
"
).
attrs
[
"
names
"
]
=
newLabelsNames
filterViews
(
datasetFile
,
temp_dataset
,
views
,
usedIndices
)
labelsDictionary
=
dict
((
labelIndex
,
labelName
)
for
labelIndex
,
labelName
in
labelsDictionary
=
dict
((
labelIndex
,
labelName
.
decode
(
"
utf-8
"
))
for
labelIndex
,
labelName
in
enumerate
(
temp_dataset
.
get
(
"
Labels
"
).
attrs
[
"
names
"
]))
return
dataset
File
,
labelsDictionary
return
temp_
dataset
,
labelsDictionary
def
copyhdf5Dataset
(
sourceDataFile
,
destinationDataFile
,
sourceDatasetName
,
destinationDatasetName
,
usedIndices
):
"""
Used to copy a view in a new dataset file using only the examples of usedIndices, and copying the args
"""
newDset
=
destinationDataFile
.
create_dataset
(
destinationDatasetName
,
data
=
sourceDataFile
.
get
(
sourceDatasetName
).
value
[
usedIndices
,:])
if
"
sparse
"
in
sourceDataFile
.
get
(
sourceDatasetName
).
attrs
.
keys
()
and
sourceDataFile
.
get
(
sourceDatasetName
).
attrs
[
"
sparse
"
]:
# TODO : Support sparse
pass
def
getClassicDBcsv
(
views
,
pathF
,
nameDB
,
NB_CLASS
,
askedLabelsNames
,
randomState
,
delimiter
=
"
,
"
):
# TODO : Update this one
labelsNames
=
np
.
genfromtxt
(
pathF
+
nameDB
+
"
-labels-names.csv
"
,
dtype
=
'
str
'
,
delimiter
=
delimiter
)
datasetFile
=
h5py
.
File
(
pathF
+
nameDB
+
"
.hdf5
"
,
"
w
"
)
labels
=
np
.
genfromtxt
(
pathF
+
nameDB
+
"
-labels.csv
"
,
delimiter
=
delimiter
)
labelsDset
=
datasetFile
.
create_dataset
(
"
Labels
"
,
labels
.
shape
,
data
=
labels
)
labelsDset
.
attrs
[
"
names
"
]
=
[
labelName
.
encode
()
for
labelName
in
labelsNames
]
viewFileNames
=
[
viewFileName
for
viewFileName
in
os
.
listdir
(
pathF
+
"
Views/
"
)]
# import pdb;pdb.set_trace()
for
viewIndex
,
viewFileName
in
enumerate
(
os
.
listdir
(
pathF
+
"
Views/
"
)):
viewFile
=
pathF
+
"
Views/
"
+
viewFileName
if
viewFileName
[
-
6
:]
!=
"
-s.csv
"
:
viewMatrix
=
np
.
genfromtxt
(
viewFile
,
delimiter
=
delimiter
)
viewDset
=
datasetFile
.
create_dataset
(
"
View
"
+
str
(
viewIndex
),
viewMatrix
.
shape
,
data
=
viewMatrix
)
viewDset
.
attrs
[
"
name
"
]
=
viewFileName
[:
-
4
]
viewDset
.
attrs
[
"
sparse
"
]
=
False
else
:
for
key
,
value
in
sourceDataFile
.
get
(
sourceDatasetName
).
attrs
.
items
():
newDset
.
attrs
[
key
]
=
value
pass
metaDataGrp
=
datasetFile
.
create_group
(
"
Metadata
"
)
metaDataGrp
.
attrs
[
"
nbView
"
]
=
len
(
viewFileNames
)
metaDataGrp
.
attrs
[
"
nbClass
"
]
=
len
(
labelsNames
)
metaDataGrp
.
attrs
[
"
datasetLength
"
]
=
len
(
labels
)
datasetFile
.
close
()
datasetFile
,
labelsDictionary
=
getClassicDBhdf5
(
views
,
pathF
,
nameDB
,
NB_CLASS
,
askedLabelsNames
,
randomState
)
# if len(askedLabelsNames) != NB_CLASS:
# nbLabelsAvailable = 0
# for l in labelsNamesFile:
# nbLabelsAvailable += 1
# askedLabelsNames = [line.strip().split(";")[1] for lineIdx, line in enumerate(labelsNamesFile) if
# lineIdx in randomState.randint(nbLabelsAvailable, size=NB_CLASS)]
# fullLabels = np.genfromtxt(pathF + nameDB + '-ClassLabels.csv', delimiter=',').astype(int)
# labelsDictionary = dict((labelIndex, labelName) for labelIndex, labelName in enumerate(labelsNames))
# if len(set(fullLabels)) > NB_CLASS:
# usedIndices = getPositions(labelsDictionary.keys(), fullLabels)
# else:
# usedIndices = range(len(fullLabels))
# for viewIndex, view in enumerate(views):
# viewFile = pathF + nameDB + "-" + view + '.csv'
# viewMatrix = np.array(np.genfromtxt(viewFile, delimiter=','))[usedIndices, :]
# viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewMatrix.shape, data=viewMatrix)
# viewDset.attrs["name"] = view
# viewDset.attrs["sparse"] = False
# viewDset.attrs["binary"] = False
# labelsDset.attrs["labels_indices"] = [labelIndex for labelIndex, labelName in labelsDictionary.iteritems()]
# datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
return
datasetFile
,
labelsDictionary
# def getLabelSupports(CLASS_LABELS):
# """Used to get the number of example for each label"""
...
...
@@ -333,43 +385,6 @@ def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, dest
# return usedIndices
# def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, randomState):
# TODO : Update this one
# labelsNamesFile = open(pathF + nameDB + '-ClassLabels-Description.csv')
# datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w")
# if len(LABELS_NAMES) != NB_CLASS:
# nbLabelsAvailable = 0
# for l in labelsNamesFile:
# nbLabelsAvailable += 1
# LABELS_NAMES = [line.strip().split(";")[1] for lineIdx, line in enumerate(labelsNamesFile) if
# lineIdx in randomState.randint(nbLabelsAvailable, size=NB_CLASS)]
# fullLabels = np.genfromtxt(pathF + nameDB + '-ClassLabels.csv', delimiter=',').astype(int)
# labelsDictionary = dict((classIndex, labelName) for (classIndex, labelName) in
# [(int(line.strip().split(";")[0]), line.strip().split(";")[1]) for lineIndex, line in
# enumerate(labelsNamesFile) if line.strip().split(";")[0] in LABELS_NAMES])
# if len(set(fullLabels)) > NB_CLASS:
# usedIndices = getPositions(labelsDictionary.keys(), fullLabels)
# else:
# usedIndices = range(len(fullLabels))
# for viewIndex, view in enumerate(views):
# viewFile = pathF + nameDB + "-" + view + '.csv'
# viewMatrix = np.array(np.genfromtxt(viewFile, delimiter=','))[usedIndices, :]
# viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewMatrix.shape, data=viewMatrix)
# viewDset.attrs["name"] = view
# viewDset.attrs["sparse"] = False
# viewDset.attrs["binary"] = False
#
# labelsDset = datasetFile.create_dataset("Labels", fullLabels[usedIndices].shape, data=fullLabels[usedIndices])
# labelsDset.attrs["labels"] = [labelName for index, labelName in labelsDictionary.iteritems()]
# labelsDset.attrs["labels_indices"] = [labelIndex for labelIndex, labelName in labelsDictionary.iteritems()]
#
# metaDataGrp = datasetFile.create_group("Metadata")
# metaDataGrp.attrs["nbView"] = len(views)
# metaDataGrp.attrs["nbClass"] = NB_CLASS
# metaDataGrp.attrs["datasetLength"] = len(fullLabels[usedIndices])
# datasetFile.close()
# datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r")
# return datasetFile, labelsDictionary
# def getCaltechDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, randomState):
# datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w")
...
...
This diff is collapsed.
Click to expand it.
Code/Tests/Test_utils/test_GetMultiviewDB.py
+
396
−
290
View file @
b4d13a7a
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment