Skip to content
Snippets Groups Projects
Commit abe5ba3b authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added argument dictionaries and propagation, need to add gridsearch anc lassifier config for each

parent 16b33769
Branches
Tags
No related merge requests found
Showing
with 280 additions and 78 deletions
import argparse
import pkgutil
import MultiView
import MonoView
import Multiview
from Multiview.ExecMultiview import ExecMultiview
from Monoview.ExecClassifMonoView import ExecMonoview
import Monoview
import os
import time
import logging
from joblib import Parallel, delayed
from ResultAnalysis import resultAnalysis
import numpy as np
parser = argparse.ArgumentParser(
description='This file is used to benchmark the accuracies fo multiple classification algorithm on multiview data.',
......@@ -12,73 +17,210 @@ parser = argparse.ArgumentParser(
groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',default='Caltech')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of database : .hdf5 or .csv',default='.csv')
groupStandard.add_argument('--views', metavar='STRING', action='store',help='Name of the views selected for learning', default='RGB:HOG:SIFT')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',help='Path to the views (default: %(default)s)',default='../FeatExtraction/Results-FeatExtr/')
groupStandard.add_argument('--fileCL', metavar='STRING', action='store', help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv')
groupStandard.add_argument('--fileCLD', metavar='STRING', action='store', help='Name of classLabels-Description CSV-file (default: %(default)s)', default='classLabels-Description.csv')
groupStandard.add_argument('--fileFeat', metavar='STRING', action='store', help='Name of feature CSV-file (default: %(default)s)', default='feature.csv')
groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)',
default='Caltech')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of database : .hdf5 or .csv',
default='.csv')
groupStandard.add_argument('--views', metavar='STRING', action='store',help='Name of the views selected for learning',
default='RGB:HOG:SIFT:HOG:MHOG')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',help='Path to the views (default: %(default)s)',
default='../FeatExtraction/Results-FeatExtr/')
groupStandard.add_argument('--fileCL', metavar='STRING', action='store',
help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv')
groupStandard.add_argument('--fileCLD', metavar='STRING', action='store',
help='Name of classLabels-Description CSV-file (default: %(default)s)',
default='classLabels-Description.csv')
groupStandard.add_argument('--fileFeat', metavar='STRING', action='store',
help='Name of feature CSV-file (default: %(default)s)', default='feature.csv')
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',help='Determine the learning rate if > 1.0, number of fold for cross validation', type=float,default=0.9)
groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',type=int, default=3)
groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,default=4)
groupClass.add_argument('--CL_classes', metavar='STRING', action='store',help='Classes used in the dataset (names of the folders) if not filled, random classes will be selected ex. walrus:mole:leopard', default="")
groupClass.add_argument('--CL_type', metavar='STRING', action='store',help='Determine whether to use Multiview, Monoview or Benchmark', default='Benchmark')
groupClass.add_argument('--CL_algorithm', metavar='STRING', action='store',help='Determine which multiview classifier to use, if CL_type = Benchmark, list all needed algorithms separated with :', default='')
groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,default=5)
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store',
help='Determine the learning rate if > 1.0, number of fold for cross validation', type=float,
default=0.9)
groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation',
type=int, default=3)
groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int,
default=4)
groupClass.add_argument('--CL_classes', metavar='STRING', action='store',
help='Classes used in the dataset (names of the folders) if not filled, random classes will be '
'selected ex. walrus:mole:leopard', default="")
groupClass.add_argument('--CL_type', metavar='STRING', action='store',
help='Determine whether to use Multiview, Monoview, or Benchmark, separate with : if multiple',
default='Benchmark')
groupClass.add_argument('--CL_algorithm', metavar='STRING', action='store',
help='Determine which classifier to use, if CL_type = Benchmark, fill monoview and multiview '
'options', default='')
groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store',
help='Determine which monoview classifier to use, separate with : if multiple', default='')
groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store',
help='Determine which multiview classifier to use, separate with : if multiple', default='')
groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=5)
groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees', default='25 75 125 175')
groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees',
default='25 75 125 175')
groupSVC = parser.add_argument_group('SVC arguments')
groupSVC.add_argument('--CL_SVC_kernel', metavar='STRING', action='store', help='GridSearch : Kernels used', default='linear')
groupSVC.add_argument('--CL_SVC_C', metavar='STRING', action='store', help='GridSearch : Penalty parameters used', default='1:10:100:1000')
groupSVC.add_argument('--CL_SVC_kernel', metavar='STRING', action='store', help='GridSearch : Kernels used',
default='linear')
groupSVC.add_argument('--CL_SVC_C', metavar='STRING', action='store', help='GridSearch : Penalty parameters used',
default='1:10:100:1000')
groupRF = parser.add_argument_group('Decision Trees arguments')
groupRF.add_argument('--CL_DT_depth', metavar='STRING', action='store', help='GridSearch: Determine max depth for Decision Trees', default='1:3:5:7')
groupRF.add_argument('--CL_DT_depth', metavar='STRING', action='store',
help='GridSearch: Determine max depth for Decision Trees', default='1:3:5:7')
groupSGD = parser.add_argument_group('SGD arguments')
groupSGD.add_argument('--CL_SGD_alpha', metavar='STRING', action='store', help='GridSearch: Determine alpha for SGDClassifier', default='0.1:0.2:0.5:0.9')
groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store', help='GridSearch: Determine loss for SGDClassifier', default='log')
groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store', help='GridSearch: Determine penalty for SGDClassifier', default='l2')
groupSGD.add_argument('--CL_SGD_alpha', metavar='STRING', action='store',
help='GridSearch: Determine alpha for SGDClassifier', default='0.1:0.2:0.5:0.9')
groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store',
help='GridSearch: Determine loss for SGDClassifier', default='log')
groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store',
help='GridSearch: Determine penalty for SGDClassifier', default='l2')
groupMumbo = parser.add_argument_group('Mumbo arguments')
groupMumbo.add_argument('--MU_type', metavar='STRING', action='store',help='Determine which monoview classifier to use with Mumbo',default='DecisionTree:DecisionTree:DecisionTree:DecisionTree')
groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',help='Configuration for the monoview classifier in Mumbo', default=['3:1.0', '3:1.0', '3:1.0','3:1.0'])
groupMumbo.add_argument('--MU_iter', metavar='INT', action='store',help='Number of iterations in Mumbos learning process', type=int, default=5)
groupMumbo.add_argument('--MU_types', metavar='STRING', action='store',
help='Determine which monoview classifier to use with Mumbo',default='DecisionTree')
groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifier in Mumbo',
default=['3:1.0', '3:1.0', '3:1.0','3:1.0'])
groupMumbo.add_argument('--MU_iter', metavar='INT', action='store',
help='Number of iterations in Mumbos learning process', type=int, default=5)
groupFusion = parser.add_argument_group('Fusion arguments')
groupFusion.add_argument('--FU_type', metavar='STRING', action='store',help='Determine which type of fusion to use', default='LateFusion')
groupFusion.add_argument('--FU_method', metavar='STRING', action='store',help='Determine which method of fusion to use', default='WeightedLinear')
groupFusion.add_argument('--FU_method_config', metavar='STRING', action='store', nargs='+',help='Configuration for the fusion method', default=['1:1:1:1'])
groupFusion.add_argument('--FU_cl_names', metavar='STRING', action='store',help='Names of the monoview classifiers used',default='RandomForest:SGD:SVC:DecisionTree')
groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nargs='+',help='Configuration for the monoview classifiers used', default=['3:4', 'log:l2', '10:linear','4'])
groupFusion.add_argument('--FU_types', metavar='STRING', action='store',
help='Determine which type of fusion to use, if multiple separate with :',
default='LateFusion')
groupFusion.add_argument('--FU_ealy_methods', metavar='STRING', action='store',
help='Determine which early fusion method of fusion to use, if multiple separate with :',
default='WeightedLinear')
groupFusion.add_argument('--FU_late_methods', metavar='STRING', action='store',
help='Determine which late fusion method of fusion to use, if multiple separate with :',
default='WeightedLinear')
groupFusion.add_argument('--FU_method_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the fusion method', default=['1:1:1:1'])
groupFusion.add_argument('--FU_cl_names', metavar='STRING', action='store',
help='Names of the monoview classifiers used',default='RandomForest:SGD:SVC:DecisionTree')
groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nargs='+',
help='Configuration for the monoview classifiers used', default=['3:4', 'log:l2', '10:linear','4'])
args = parser.parse_args()
if args.CL_type=="Benchmark":
benchmark = {}
if args.CL_type.split(":")==["Benchmark"]:
if args.CL_algorithm=='':
fusionModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MultiView/Fusion/Methods']) if not isPackage]
fusionModules = [getattr(MultiView.Fusion.Methods, fusionModulesName)
fusionModulesNames = [name for _, name, isPackage
in pkgutil.iter_modules(['Multiview/Fusion/Methods']) if not isPackage]
fusionModules = [getattr(Multiview.Fusion.Methods, fusionModulesName)
for fusionModulesName in fusionModulesNames]
fusionClasses = [getattr(fusionModule, fusionModulesName+"Classifier")
for fusionModulesName, fusionModule in zip(fusionModulesNames, fusionModules)]
fusionMethods = dict((fusionModulesName, [subclass.__name__ for subclass in fusionClasse.__subclasses__() ])
for fusionModulesName, fusionClasse in zip(fusionModulesNames, fusionClasses))
fusionMonoviewClassifiers = [name for _, name, isPackage in
pkgutil.iter_modules(['MultiView/Fusion/Methods/MonoviewClassifiers'])
if not isPackage and not name in ["SubSamplig", "ModifiedMulticlass"]]
pkgutil.iter_modules(['Multiview/Fusion/Methods/MonoviewClassifiers'])
if not isPackage ]
allFusionAlgos = {"Methods": fusionMethods, "Classifiers": fusionMonoviewClassifiers}
allMumboAlgos = [name for _, name, isPackage in
pkgutil.iter_modules(['MultiView/Mumbo/Classifiers'])
if not isPackage]
pkgutil.iter_modules(['Multiview/Mumbo/Classifiers'])
if not isPackage and not name in ["SubSampling", "ModifiedMulticlass", "Kover"]]
allMultiviewAlgos = {"Fusion": allFusionAlgos, "Mumbo": allMumboAlgos}
allMonoviewAlgos = [key[15:] for key in dir(MonoView.ClassifMonoView) if key[:15]=="MonoviewClassif"]
allMonoviewAlgos = [key[15:] for key in dir(Monoview.ClassifMonoView) if key[:15] == "MonoviewClassif"]
benchmark = {"Monoview": allMonoviewAlgos, "Multiview" : allMultiviewAlgos}
if "Multiview" in args.CL_type.strip(":"):
benchmark["Multiview"] = {}
if "Mumbo" in args.CL_algos_multiview.split(":"):
benchmark["Multiview"]["Mumbo"] = [args.MU_types.split(":")]
if "Fusion" in args.CL_algo_multiview.split(":"):
benchmark["Multiview"]["Fusion"]= {}
benchmark["Multiview"]["Fusion"]["Methods"] = dict((fusionType, []) for fusionType in args.FU_types.split(":"))
if "LateFusion" in args.FU_types.split(":"):
benchmark["Multiview"]["Fusion"]["LateFusion"] = args.FU_late_methods.split(":")
if "EarlyFusion" in args.FU_types.split(":"):
benchmark["Multiview"]["Fusion"]["EarlyFusion"] = args.FU_early_methods.split(":")
benchmark["Multiview"]["Fusion"]["Classifiers"] = args.FU_cl_names.split(":")
if "Monoview" in args.CL_type.strip(":"):
benchmark["Monoview"] = args.CL_algos_monoview.split(":")
classifierTable = "a"
fusionClassifierConfig = "a"
fusionMethodConfig = "a"
mumboNB_ITER = "a"
mumboClassifierConfig = "a"
mumboclassifierNames = "a"
RandomForestKWARGS = {"classifier__n_estimators":map(int, args.CL_RF_trees.split())}
SVCKWARGS = {"classifier__kernel":args.CL_SVC_kernel.split(":"), "classifier__C":map(int,args.CL_SVC_C.split(":"))}
DecisionTreeKWARGS = {"classifier__max_depth":map(int,args.CL_DT_depth.split(":"))}
SGDKWARGS = {"classifier__alpha" : map(float,args.CL_SGD_alpha.split(":")), "classifier__loss":args.CL_SGD_loss.split(":"),
"classifier__penalty":args.CL_SGD_penalty.split(":")}
print benchmark
argumentDictionaries = {"Monoview":[], "Multiview":[]}
if benchmark["Monoview"]:
for classifier in benchmark["Monoview"]:
for view in args.views.split(":"):
arguments = {classifier+"KWARGS": globals()[classifier+"KWARGS"], "feat":view, "fileFeat": args.fileFeat,
"fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": classifier}
argumentDictionaries["Monoview"].append(arguments)
if benchmark["Multiview"]:
if benchmark["Multiview"]["Fusion"]:
if benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] and benchmark["Multiview"]["Fusion"]["Classifiers"]:
for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]:
for i in range(int(np.power(len(args.views.split(":")), len(benchmark["Multiview"]["Fusion"]["Classifiers"])))):
#for classifier in benchmark["Multiview"]["Fusion"]["Classifiers"]:
# for view in args.views.split(":"):
if True==True:
classifiersMatrix = []
arguments = {"CL_type": "Fusion",
"views": args.views.split(":"),
"NB_VIEW": len(args.views.split(":")),
"NB_CLASS": len(args.CL_classes.split(":")),
"LABELS_NAMES": args.CL_classes.split(":"),
"FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method,
"monoviewClassifiersNames": classifierTable,
"monoviewClassifiersConfigs": fusionClassifierConfig,
'fusionMethodConfig': fusionMethodConfig}}
argumentDictionaries["Multiview"].append(arguments)
if benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"] and benchmark["Multiview"]["Fusion"]["Classifiers"]:
for method in benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"]:
for classifier in benchmark["Multiview"]["Fusion"]["Classifiers"]:
arguments = {"CL_type": "Fusion",
"views": args.views.split(":"),
"NB_VIEW": len(args.views.split(":")),
"NB_CLASS": len(args.CL_classes.split(":")),
"LABELS_NAMES": args.CL_classes.split(":"),
"FusionKWARGS": {"fusionType":"EarlyFusion", "fusionMethod":method,
"monoviewClassifiersNames": classifier,
"monoviewClassifiersConfigs": fusionClassifierConfig,
'fusionMethodConfig': fusionMethodConfig}}
argumentDictionaries["Multiview"].append(arguments)
if benchmark["Multiview"]["Mumbo"]:
#for classifier in benchmark["Multiview"]["Mumbo"]:
for i in range(int(np.power(len(args.views.split(":")), len(benchmark["Multiview"]["Mumbo"])))):
arguments = {"CL_type": "Mumbo",
"views": args.views.split(":"),
"NB_VIEW": len(args.views.split(":")),
"NB_CLASS": len(args.CL_classes.split(":")),
"LABELS_NAMES": args.CL_classes.split(":"),
"MumboKWARGS": {"classifiersConfigs": mumboClassifierConfig,"NB_ITER": mumboNB_ITER,
"classifiersNames": mumboclassifierNames}}
argumentDictionaries["Multiview"].append(arguments)
results = {}
# for classifierType, argumentsList in argumentDictionaries.iteritems():
# executionMethod = globals()["Exec"+classifierType]
# results[classifierType] = Parallel(n_jobs=args.CL_cores)(delayed(executionMethod)
# (args.name, args.CL_split,args.CL_nbFolds, 1, args.type,
# args.pathF, **arguments)
# for arguments in argumentsList)
resultAnalysis(benchmark, results)
print len(argumentDictionaries["Multiview"]), len(argumentDictionaries["Monoview"])
# views = args.views.split(":")
# dataBaseType = args.type
......
......@@ -13,7 +13,7 @@ import logging # To create Log-Files
# Import own modules
import DBCrawl # Functions to read Images from Database
import Code.MonoView.ExportResults # Functions to render results
import Code.Monoview.ExportResults # Functions to render results
import FeatExtraction # Functions to extract the views from Database
# Author-Info
......@@ -235,44 +235,44 @@ dir = os.path.dirname(os.path.abspath(__file__)) + "/Results-FeatExtr/"
### Classlabels and Description
OutputfileNameClassLabels = datetime.datetime.now().strftime("%Y_%m_%d") + "-FE-" + nameDB + "-ClassLabels"
Code.MonoView.ExportResults.exportNumpyToCSV(dfImages.classLabel, dir, OutputfileNameClassLabels, '%i')
Code.MonoView.ExportResults.exportNumpyToCSV(dfImages.classLabel, dir, nameDB+'-ClassLabels', '%i')
Code.Monoview.ExportResults.exportNumpyToCSV(dfImages.classLabel, dir, OutputfileNameClassLabels, '%i')
Code.Monoview.ExportResults.exportNumpyToCSV(dfImages.classLabel, dir, nameDB + '-ClassLabels', '%i')
fileNameClassLabels = datetime.datetime.now().strftime("%Y_%m_%d") + "-FE-" + nameDB + "-ClassLabels-Description"
Code.MonoView.ExportResults.exportPandasToCSV(sClassLabels, dir, fileNameClassLabels)
Code.MonoView.ExportResults.exportPandasToCSV(sClassLabels, dir, nameDB+'-ClassLabels-Description')
Code.Monoview.ExportResults.exportPandasToCSV(sClassLabels, dir, fileNameClassLabels)
Code.Monoview.ExportResults.exportPandasToCSV(sClassLabels, dir, nameDB + '-ClassLabels-Description')
format = '%1.30f'
### RGB
if(args.RGB):
fileName = datetime.datetime.now().strftime("%Y_%m_%d") + "-FE-" + rgb_feat_desc
Code.MonoView.ExportResults.exportNumpyToCSV(rgb_f_extr_res, dir, fileName, format)
Code.MonoView.ExportResults.exportNumpyToCSV(rgb_f_extr_res, dir, nameDB+"-RGB", format)
Code.Monoview.ExportResults.exportNumpyToCSV(rgb_f_extr_res, dir, fileName, format)
Code.Monoview.ExportResults.exportNumpyToCSV(rgb_f_extr_res, dir, nameDB + "-RGB", format)
### HSV
if(args.HSV):
fileName = datetime.datetime.now().strftime("%Y_%m_%d") + "-FE-" + hsv_feat_desc
Code.MonoView.ExportResults.exportNumpyToCSV(hsv_f_extr_res, dir, fileName, format)
Code.MonoView.ExportResults.exportNumpyToCSV(hsv_f_extr_res, dir, nameDB+"-HSV", format)
Code.Monoview.ExportResults.exportNumpyToCSV(hsv_f_extr_res, dir, fileName, format)
Code.Monoview.ExportResults.exportNumpyToCSV(hsv_f_extr_res, dir, nameDB + "-HSV", format)
### SIFT
if(args.SIFT):
fileName = datetime.datetime.now().strftime("%Y_%m_%d") + "-FE-" + sift_feat_desc
Code.MonoView.ExportResults.exportNumpyToCSV(sift_f_extr_res, dir, fileName, format)
Code.MonoView.ExportResults.exportNumpyToCSV(sift_f_extr_res, dir, nameDB+"-SIFT", format)
Code.Monoview.ExportResults.exportNumpyToCSV(sift_f_extr_res, dir, fileName, format)
Code.Monoview.ExportResults.exportNumpyToCSV(sift_f_extr_res, dir, nameDB + "-SIFT", format)
### SURF
if(args.SURF):
fileName = datetime.datetime.now().strftime("%Y_%m_%d") + "-FE-" + surf_feat_desc
Code.MonoView.ExportResults.exportNumpyToCSV(surf_f_extr_res, dir, fileName, format)
Code.MonoView.ExportResults.exportNumpyToCSV(surf_f_extr_res, dir, nameDB+"-SURF", format)
Code.Monoview.ExportResults.exportNumpyToCSV(surf_f_extr_res, dir, fileName, format)
Code.Monoview.ExportResults.exportNumpyToCSV(surf_f_extr_res, dir, nameDB + "-SURF", format)
### HOG
if(args.HOG):
fileName = datetime.datetime.now().strftime("%Y_%m_%d") + "-FE-" + hog_feat_desc
Code.MonoView.ExportResults.exportNumpyToCSV(hog_f_extr_res, dir, fileName, format)
Code.MonoView.ExportResults.exportNumpyToCSV(hog_f_extr_res, dir, nameDB+"-HOG", format)
Code.Monoview.ExportResults.exportNumpyToCSV(hog_f_extr_res, dir, fileName, format)
Code.Monoview.ExportResults.exportNumpyToCSV(hog_f_extr_res, dir, nameDB + "-HOG", format)
logging.debug("Done:\t Save Features to CSV Databases")
\ No newline at end of file
File moved
......@@ -30,21 +30,30 @@ __date__ = 2016-03-25
### Argument Parser
def ExecMonoview(args, RandomForestKWARGS, SVCKWARGS, DecisionTreeKWARGS, SGDKWARGS):
def ExecMonoview(name, learningRate, nbFolds, nbCores, databaseType, path, **kwargs):
RandomForestKWARGS = kwargs["RandomForestKWARGS"]
SVCKWARGS = kwargs["SVCKWARGS"]
DecisionTreeKWARGS = kwargs["DecisionTreeKWARGS"]
SGDKWARGS = kwargs["SGDKWARGS"]
feat = kwargs["feat"]
fileFeat = kwargs["fileFeat"]
fileCL = kwargs["fileCL"]
fileCLD = kwargs["fileCLD"]
CL_type = kwargs["CL_type"]
# Determine the Database to extract features
logging.debug("### Main Programm for Classification MonoView")
logging.debug("### Classification - Database:" + str(args.name) + " Feature:" + str(args.feat) + " train_size:" + str(args.CL_split) + ", GridSearch of Trees:" + args.CL_RF_trees + ", CrossValidation k-folds:" + str(args.CL_CV) + ", cores:" + str(args.CL_Cores))
logging.debug("### Classification - Database:" + str(name) + " Feature:" + str(feat) + " train_size:" + str(learningRate) + ", CrossValidation k-folds:" + str(nbFolds) + ", cores:" + str(nbCores))
# Read the features
logging.debug("Start:\t Read "+args.type+" Files")
logging.debug("Start:\t Read " + databaseType + " Files")
if args.type == "csv":
X = np.genfromtxt(args.pathF + args.fileFeat, delimiter=';')
Y = np.genfromtxt(args.pathF + args.fileCL, delimiter=';')
elif args.type == "hdf5":
dataset = h5py.File(args.pathF + args.name + ".hdf5", "r")
if databaseType == "csv":
X = np.genfromtxt(path + fileFeat, delimiter=';')
Y = np.genfromtxt(path + fileCL, delimiter=';')
elif databaseType == "hdf5":
dataset = h5py.File(path + name + ".hdf5", "r")
viewsDict = dict((dataset.get("/View"+str(viewIndex)+"/name").value, viewIndex) for viewIndex in range(dataset.get("nbView").value))
X = dataset["View"+str(viewsDict[args.feat])+"/matrix"][...]
X = dataset["View"+str(viewsDict[feat])+"/matrix"][...]
# X_ = dataset["View"+str(viewsDict[args.feat])+"/matrix"][...]
# X = np.zeros((dataset.get("datasetLength/").value, dataset["View"+str(viewsDict[args.feat])+"/shape"][1]), dtype=int)
# for exampleindice, exampleArray in enumerate(X_):
......@@ -60,7 +69,7 @@ def ExecMonoview(args, RandomForestKWARGS, SVCKWARGS, DecisionTreeKWARGS, SGDKWA
# Calculate Train/Test data
logging.debug("Start:\t Determine Train/Test split")
X_train, X_test, y_train, y_test = ClassifMonoView.calcTrainTest(X, Y, args.CL_split)
X_train, X_test, y_train, y_test = ClassifMonoView.calcTrainTest(X, Y, learningRate)
logging.debug("Info:\t Shape X_train:" + str(X_train.shape) + ", Length of y_train:" + str(len(y_train)))
logging.debug("Info:\t Shape X_test:" + str(X_test.shape) + ", Length of y_test:" + str(len(y_test)))
......@@ -70,10 +79,10 @@ def ExecMonoview(args, RandomForestKWARGS, SVCKWARGS, DecisionTreeKWARGS, SGDKWA
logging.debug("Start:\t Classification")
classifierFunction = getattr(ClassifMonoView, "MonoviewClassif"+args.CL_type)
classifierKWARGS = globals()[args.CL_type+"KWARGS"]
classifierFunction = getattr(ClassifMonoView, "MonoviewClassif"+CL_type)
classifierKWARGS = globals()[CL_type+"KWARGS"]
cl_desc, cl_res = classifierFunction(X_train, y_train, nbFolds=args.CL_CV, nbCores = args.CL_Cores,
cl_desc, cl_res = classifierFunction(X_train, y_train, nbFolds=nbFolds, nbCores=nbCores,
**classifierKWARGS)
t_end = time.time() - t_start
......@@ -88,13 +97,13 @@ def ExecMonoview(args, RandomForestKWARGS, SVCKWARGS, DecisionTreeKWARGS, SGDKWA
# CSV Export
logging.debug("Start:\t Exporting to CSV")
dir = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/"
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat
ExportResults.exportPandasToCSV(df_class_res, dir, filename)
logging.debug("Done:\t Exporting to CSV")
# Stats Result
y_test_pred = cl_res.predict(X_test)
classLabelsDesc = pd.read_csv(args.pathF + args.fileCLD, sep=";", names=['label', 'name'])
classLabelsDesc = pd.read_csv(path + fileCLD, sep=";", names=['label', 'name'])
classLabelsNames = classLabelsDesc.name
#logging.debug("" + str(classLabelsNames))
classLabelsNamesList = classLabelsNames.values.tolist()
......@@ -107,21 +116,21 @@ def ExecMonoview(args, RandomForestKWARGS, SVCKWARGS, DecisionTreeKWARGS, SGDKWA
# Classification Report with Precision, Recall, F1 , Support
logging.debug("Info:\t Classification report:")
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-Report"
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Report"
logging.debug("\n" + str(metrics.classification_report(y_test, y_test_pred, labels = range(0,len(classLabelsDesc.name)), target_names=classLabelsNamesList)))
scores_df = ExportResults.classification_report_df(dir, filename, y_test, y_test_pred, range(0, len(classLabelsDesc.name)), classLabelsNamesList)
# Create some useful statistcs
logging.debug("Info:\t Statistics:")
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-Stats"
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Stats"
stats_df = ExportResults.classification_stats(dir, filename, scores_df, accuracy_score)
logging.debug("\n" + stats_df.to_string())
# Confusion Matrix
logging.debug("Info:\t Calculate Confusionmatrix")
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-ConfMatrix"
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-ConfMatrix"
df_conf_norm = ExportResults.confusion_matrix_df(dir, filename, y_test, y_test_pred, classLabelsNamesList)
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-ConfMatrixImg"
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-ConfMatrixImg"
ExportResults.plot_confusion_matrix(dir, filename, df_conf_norm)
logging.debug("Done:\t Statistic Results")
......@@ -131,8 +140,8 @@ def ExecMonoview(args, RandomForestKWARGS, SVCKWARGS, DecisionTreeKWARGS, SGDKWA
logging.debug("Start:\t Plot Result")
np_score = ExportResults.calcScorePerClass(y_test, cl_res.predict(X_test).astype(int))
### dir and filename the same as CSV Export
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-Score"
ExportResults.showResults(dir, filename, args.name, args.feat, np_score)
filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Score"
ExportResults.showResults(dir, filename, name, feat, np_score)
logging.debug("Done:\t Plot Result")
......@@ -201,3 +210,8 @@ if __name__=='__main__':
if(args.log):
logging.getLogger().addHandler(logging.StreamHandler())
arguments = {"RandomForestKWARGS": RandomForestKWARGS, "SVCKWARGS": SVCKWARGS,
"DecisionTreeKWARGS": DecisionTreeKWARGS, "SGDKWARGS": SGDKWARGS, "feat":args.feat,
"fileFeat": args.fileFeat, "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type}
ExecMonoview(args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF, **arguments)
File moved
File moved
2016-08-19 20:16:59,329 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:16:59,330 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:17:45,654 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:17:45,655 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:18:13,897 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:18:13,897 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:18:13,897 DEBUG: Start: Read hdf5 Files
2016-08-19 20:19:25,358 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:19:25,358 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:19:25,359 DEBUG: Start: Read hdf5 Files
2016-08-19 20:19:51,424 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:19:51,424 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:19:51,424 DEBUG: Start: Read hdf5 Files
2016-08-19 20:20:30,839 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:20:30,840 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:20:30,840 DEBUG: Start: Read hdf5 Files
2016-08-19 20:20:32,791 DEBUG: Info: Shape of Feature:(347, 73599), Length of classLabels vector:(347,)
2016-08-19 20:20:32,791 DEBUG: Done: Read CSV Files
2016-08-19 20:20:32,791 DEBUG: Start: Determine Train/Test split
2016-08-19 20:20:32,882 DEBUG: Info: Shape X_train:(173, 73599), Length of y_train:173
2016-08-19 20:20:32,883 DEBUG: Info: Shape X_test:(174, 73599), Length of y_test:174
2016-08-19 20:20:32,883 DEBUG: Done: Determine Train/Test split
2016-08-19 20:20:32,883 DEBUG: Start: Classification
2016-08-19 20:20:50,330 DEBUG: Info: Time for Classification: 19.4069910049[s]
2016-08-19 20:20:50,330 DEBUG: Done: Classification
2016-08-19 20:20:50,330 DEBUG: Start: Exporting to CSV
2016-08-19 20:20:50,334 DEBUG: Done: Exporting to CSV
2016-08-19 20:23:50,436 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:23:50,437 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:23:50,437 DEBUG: Start: Read hdf5 Files
2016-08-19 20:23:50,527 DEBUG: Info: Shape of Feature:(347, 73599), Length of classLabels vector:(347,)
2016-08-19 20:23:50,528 DEBUG: Done: Read CSV Files
2016-08-19 20:23:50,528 DEBUG: Start: Determine Train/Test split
2016-08-19 20:23:50,608 DEBUG: Info: Shape X_train:(173, 73599), Length of y_train:173
2016-08-19 20:23:50,608 DEBUG: Info: Shape X_test:(174, 73599), Length of y_test:174
2016-08-19 20:23:50,608 DEBUG: Done: Determine Train/Test split
2016-08-19 20:23:50,608 DEBUG: Start: Classification
;a_class_time;b_cl_desc;c_cl_res;d_cl_score
0;19.4069910049;Classif_DT-CV_5-;"GridSearchCV(cv=5, error_score='raise',
estimator=Pipeline(steps=[('classifier', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, random_state=None, splitter='best'))]),
fit_params={}, iid=True, n_jobs=4,
param_grid={'classifier__max_depth': [1, 3, 5, 7]},
pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)";0.618497109827
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment