Skip to content
Snippets Groups Projects
Commit 7fb6316a authored by bbauvin's avatar bbauvin
Browse files

Added early dataset extraction and optimized hdf5 dataset need to updates...

Added early dataset extraction and optimized hdf5 dataset need to updates other dataset extraction method
parent cc4aa121
No related branches found
No related tags found
No related merge requests found
Showing
with 88 additions and 40 deletions
......@@ -3,6 +3,7 @@ import pkgutil
import Multiview
from Multiview.ExecMultiview import ExecMultiview
from Monoview.ExecClassifMonoView import ExecMonoview
import Multiview.GetMultiviewDb as DB
import Monoview
import os
import time
......@@ -130,6 +131,14 @@ logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=lo
if args.log:
logging.getLogger().addHandler(logging.StreamHandler())
getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:])
DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, len(args.CL_classes), args.CL_classes)
datasetLength = DATASET.get("Metadata").attrs["datasetLength"]
NB_VIEW = DATASET.get("Metadata").attrs["nbView"]
views = [str(DATASET.get("View"+str(viewIndex)).attrs["name"]) for viewIndex in range(NB_VIEW)]
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
logging.info("Begginging")
benchmark = {}
if args.CL_type.split(":")==["Benchmark"]:
......@@ -184,28 +193,30 @@ KNNKWARGS = {"classifier__n_neighbors": map(float,args.CL_KNN_neigh.split(":"))}
argumentDictionaries = {"Monoview":{}, "Multiview":[]}
if benchmark["Monoview"]:
for view in args.views.split(":"):
argumentDictionaries["Monoview"][str(view)] = []
for classifier in benchmark["Monoview"]:
arguments = {classifier+"KWARGS": globals()[classifier+"KWARGS"], "feat":view, "fileFeat": args.fileFeat,
"fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": classifier,
classifier+"KWARGS": globals()[classifier+"KWARGS"]}
argumentDictionaries["Monoview"][str(view)].append(arguments)
bestClassifiers = []
bestClassifiersConfigs = []
for viewArguments in argumentDictionaries["Monoview"].values():
resultsMonoview = Parallel(n_jobs=nbCores)(
delayed(ExecMonoview)(args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, gridSearch=True,
**arguments)
for arguments in viewArguments)
accuracies = [result[1] for result in resultsMonoview]
classifiersNames = [result[0] for result in resultsMonoview]
classifiersConfigs = [result[2] for result in resultsMonoview]
bestClassifiers.append(classifiersNames[np.argmax(np.array(accuracies))])
bestClassifiersConfigs.append(classifiersConfigs[np.argmax(np.array(accuracies))])
# if benchmark["Monoview"]:
# for view in args.views.split(":"):
# argumentDictionaries["Monoview"][str(view)] = []
# for classifier in benchmark["Monoview"]:
# arguments = {classifier+"KWARGS": globals()[classifier+"KWARGS"], "feat":view, "fileFeat": args.fileFeat,
# "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": classifier,
# classifier+"KWARGS": globals()[classifier+"KWARGS"]}
# argumentDictionaries["Monoview"][str(view)].append(arguments)
#
# bestClassifiers = []
# bestClassifiersConfigs = []
# for viewIndex, viewArguments in enumerate(argumentDictionaries["Monoview"].values()):
# resultsMonoview = Parallel(n_jobs=nbCores)(
# delayed(ExecMonoview)(DATASET.get("View"+str(viewIndex)).value, DATASET.get("labels").value, args.name,
# args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, gridSearch=True,
# **arguments)
# for arguments in viewArguments)
# accuracies = [result[1] for result in resultsMonoview]
# classifiersNames = [result[0] for result in resultsMonoview]
# classifiersConfigs = [result[2] for result in resultsMonoview]
# bestClassifiers.append(classifiersNames[np.argmax(np.array(accuracies))])
# bestClassifiersConfigs.append(classifiersConfigs[np.argmax(np.array(accuracies))])
bestClassifiers = ["DecisionTree", "DecisionTree", "DecisionTree", "DecisionTree"]
bestClassifiersConfigs = [["1"],["1"],["1"],["1"]]
if benchmark["Multiview"]:
if benchmark["Multiview"]["Fusion"]:
if benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] and benchmark["Multiview"]["Fusion"]["Classifiers"]:
......@@ -249,8 +260,8 @@ if benchmark["Multiview"]:
argumentDictionaries["Multiview"].append(arguments)
resultsMultiview = Parallel(n_jobs=nbCores)(
delayed(ExecMultiview)(args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, gridSearch=True,
**arguments)
delayed(ExecMultiview)(DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF,
LABELS_DICTIONARY, gridSearch=True, **arguments)
for arguments in argumentDictionaries["Multiview"])
# for classifierType, argumentsList in argumentDictionaries.iteritems():
......
......@@ -30,7 +30,7 @@ __date__ = 2016-03-25
### Argument Parser
def ExecMonoview(name, learningRate, nbFolds, nbCores, databaseType, path, gridSearch=True, **kwargs):
def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, gridSearch=True, **kwargs):
t_start = time.time()
directory = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/"
feat = kwargs["feat"]
......@@ -44,20 +44,6 @@ def ExecMonoview(name, learningRate, nbFolds, nbCores, databaseType, path, gridS
logging.debug("### Main Programm for Classification MonoView")
logging.debug("### Classification - Database:" + str(name) + " Feature:" + str(feat) + " train_size:" + str(learningRate) + ", CrossValidation k-folds:" + str(nbFolds) + ", cores:" + str(nbCores)+", algorithm : "+CL_type)
# Read the features
logging.debug("Start:\t Read " + databaseType + " Files")
if databaseType == ".csv":
X = np.genfromtxt(path + fileFeat, delimiter=';')
Y = np.genfromtxt(path + fileCL, delimiter=';')
elif databaseType == ".hdf5":
dataset = h5py.File(path + name + ".hdf5", "r")
viewsDict = dict((dataset.get("/View"+str(viewIndex)+"/name").value, viewIndex) for viewIndex in range(dataset.get("nbView").value))
X = dataset["View"+str(viewsDict[feat])+"/matrix"][...]
Y = dataset["Labels/labelsArray"][...]
logging.debug("Info:\t Shape of Feature:" + str(X.shape) + ", Length of classLabels vector:" + str(Y.shape))
logging.debug("Done:\t Read CSV Files")
# Calculate Train/Test data
logging.debug("Start:\t Determine Train/Test split")
......@@ -204,7 +190,23 @@ if __name__=='__main__':
if(args.log):
logging.getLogger().addHandler(logging.StreamHandler())
# Read the features
logging.debug("Start:\t Read " + args.type + " Files")
if args.databaseType == ".csv":
X = np.genfromtxt(args.pathF + args.fileFeat, delimiter=';')
Y = np.genfromtxt(args.pathF + args.fileCL, delimiter=';')
elif args.type == ".hdf5":
dataset = h5py.File(args.pathF + args.name + ".hdf5", "r")
viewsDict = dict((dataset.get("View"+str(viewIndex)).attrs["name"], viewIndex) for viewIndex in range(dataset.get("Metadata").attrs["nbView"]))
X = dataset["View"+str(viewsDict[args.feat])][...]
Y = dataset["labels"][...]
logging.debug("Info:\t Shape of Feature:" + str(X.shape) + ", Length of classLabels vector:" + str(Y.shape))
logging.debug("Done:\t Read CSV Files")
arguments = {"RandomForestKWARGS": RandomForestKWARGS, "SVCKWARGS": SVCKWARGS,
"DecisionTreeKWARGS": DecisionTreeKWARGS, "SGDKWARGS": SGDKWARGS, "feat":args.feat,
"fileFeat": args.fileFeat, "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type}
ExecMonoview(args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, **arguments)
ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, **arguments)
;Non;Oui;All
Non;0.478260869565;0.0416666666667;0.328571428571
Oui;0.239130434783;0.0416666666667;0.171428571429
All;0.717391304348;0.0833333333333;0.5
;Non;Oui;All
Non;0.5;;0.314285714286
Oui;0.295454545455;;0.185714285714
All;0.795454545455;;0.5
;Non;Oui;All
Non;0.5;;0.342857142857
Oui;0.229166666667;;0.157142857143
All;0.729166666667;;0.5
;Non;Oui;All
Non;0.346153846154;0.444444444444;0.371428571429
Oui;0.0769230769231;0.277777777778;0.128571428571
All;0.423076923077;0.722222222222;0.5
;Non;Oui;All
Non;0.326923076923;0.5;0.371428571429
Oui;0.115384615385;0.166666666667;0.128571428571
All;0.442307692308;0.666666666667;0.5
Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-ConfMatrixImg-1.png

20.3 KiB

Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-ConfMatrixImg-2.png

19.2 KiB

Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-ConfMatrixImg-3.png

20.1 KiB

Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-ConfMatrixImg-4.png

21.2 KiB

Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-ConfMatrixImg.png

19.1 KiB

;Precision;Recall;F1;Support
Non;0.666666666667;0.95652173913;0.785714285714;23.0
Oui;0.5;0.0833333333333;0.142857142857;12.0
;Precision;Recall;F1;Support
Non;0.628571428571;1.0;0.771929824561;22.0
Oui;0.0;0.0;0.0;13.0
;Precision;Recall;F1;Support
Non;0.685714285714;1.0;0.813559322034;24.0
Oui;0.0;0.0;0.0;11.0
;Precision;Recall;F1;Support
Non;0.818181818182;0.692307692308;0.75;26.0
Oui;0.384615384615;0.555555555556;0.454545454545;9.0
;Precision;Recall;F1;Support
Non;0.739130434783;0.653846153846;0.69387755102;26.0
Oui;0.25;0.333333333333;0.285714285714;9.0
Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-Score-1.png

23.1 KiB

Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-Score-2.png

23 KiB

Code/Monoview/Results-ClassMonoView/2016_08_23-CMV-MultiOmic-Clinic-Score-3.png

23 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment