Skip to content
Snippets Groups Projects
Commit 8f1c9a3e authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Refactored

parent c024ec3f
No related branches found
No related tags found
No related merge requests found
......@@ -24,29 +24,10 @@ __author__ = "Nikolas Huelsmann, Baptiste BAUVIN"
__status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25
def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType,
path, randomState, hyperParamSearch="randomizedSearch",
metrics=[["accuracy_score", None]], nIter=30, **args):
DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r")
kwargs = args["args"]
views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in
range(DATASET.get("Metadata").attrs["nbView"])]
neededViewIndex = views.index(kwargs["feat"])
X = DATASET.get("View" + str(neededViewIndex))
Y = DATASET.get("Labels").value
return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path,
randomState, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=nIter, **args)
def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path,
randomState, hyperParamSearch="randomizedSearch",
metrics=[["accuracy_score", None]], nIter=30, **args):
logging.debug("Start:\t Loading data")
def initConstants(args, X, classificationIndices, labelsNames, name, directory):
try:
kwargs = args["args"]
except:
except KeyError:
kwargs = args
t_start = time.time()
feat = X.attrs["name"]
......@@ -56,6 +37,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
labelsString = "-".join(labelsNames)
timestr = time.strftime("%Y%m%d-%H%M%S")
CL_type_string = CL_type
outputFileName = directory + "/" + CL_type_string + "/" + "/" + feat + "/" + timestr + "Results-" + CL_type_string + "-" + labelsString + \
'-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-"
if not os.path.exists(os.path.dirname(outputFileName)):
......@@ -64,26 +46,18 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
logging.debug("Done:\t Loading data")
# Determine the Database to extract features
logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train ratio:"
+ str(learningRate) + ", CrossValidation k-folds: " + str(KFolds.n_splits) + ", cores:"
+ str(nbCores) + ", algorithm : " + CL_type)
return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, timestr, outputFileName
def initTrainTest(X, Y, classificationIndices):
trainIndices, testIndices = classificationIndices
# Calculate Train/Test data
logging.debug("Start:\t Determine Train/Test split")
X_train = extractSubset(X, trainIndices)
X_test = extractSubset(X, testIndices)
y_train = Y[trainIndices]
y_test = Y[testIndices]
return X_train, y_train, X_test, y_test
logging.debug("Info:\t Shape X_train:" + str(X_train.shape) + ", Length of y_train:" + str(len(y_train)))
logging.debug("Info:\t Shape X_test:" + str(X_test.shape) + ", Length of y_test:" + str(len(y_test)))
logging.debug("Done:\t Determine Train/Test split")
classifierModule = getattr(MonoviewClassifiers, CL_type)
def getKWARGS(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState,
outputFileName, KFolds, nbCores, metrics, kwargs):
if hyperParamSearch != "None":
classifierHPSearch = getattr(classifierModule, hyperParamSearch)
logging.debug("Start:\t RandomSearch best settings with " + str(nIter) + " iterations for " + CL_type)
......@@ -92,42 +66,15 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc))
logging.debug("Done:\t RandomSearch best settings")
else:
clKWARGS = kwargs[kwargs["CL_type"] + "KWARGS"]
logging.debug("Start:\t Training")
cl_res = classifierModule.fit(X_train, y_train, randomState, NB_CORES=nbCores, **clKWARGS)
logging.debug("Done:\t Training")
logging.debug("Start:\t Predicting")
# Stats Result
y_train_pred = cl_res.predict(X_train)
y_test_pred = cl_res.predict(X_test)
full_labels = cl_res.predict(X)
logging.debug("Done:\t Predicting")
t_end = time.time() - t_start
logging.debug("Info:\t Time for training and predicting: " + str(t_end) + "[s]")
clKWARGS = kwargs[CL_type + "KWARGS"]
return clKWARGS
logging.debug("Start:\t Getting Results")
stringAnalysis, imagesAnalysis, metricsScores = execute(name, classificationIndices, KFolds, nbCores,
hyperParamSearch, metrics, nIter, feat, CL_type,
clKWARGS, labelsNames, X.shape,
y_train, y_train_pred, y_test, y_test_pred, t_end,
randomState, cl_res, outputFileName)
cl_desc = [value for key, value in sorted(clKWARGS.items())]
logging.debug("Done:\t Getting Results")
def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis):
logging.info(stringAnalysis)
# labelsString = "-".join(labelsNames)
# timestr = time.strftime("%Y%m%d-%H%M%S")
# CL_type_string = CL_type
# outputFileName = directory + "/"+CL_type_string+"/"+"/"+feat+"/"+timestr +"Results-" + CL_type_string + "-" + labelsString + \
# '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-"
outputTextFile = open(outputFileName + '.txt', 'w')
outputTextFile.write(stringAnalysis)
outputTextFile.close()
np.savetxt(outputFileName + "full_pred.csv", full_labels.astype(np.int16), delimiter=",")
np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",")
np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",")
np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",")
......@@ -142,92 +89,170 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')
logging.info("Done:\t Result Analysis")
def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType,
path, randomState, hyperParamSearch="randomizedSearch",
metrics=[["accuracy_score", None]], nIter=30, **args):
DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r")
kwargs = args["args"]
views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in
range(DATASET.get("Metadata").attrs["nbView"])]
neededViewIndex = views.index(kwargs["feat"])
X = DATASET.get("View" + str(neededViewIndex))
Y = DATASET.get("Labels").value
return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path,
randomState, hyperParamSearch=hyperParamSearch,
metrics=metrics, nIter=nIter, **args)
def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path,
randomState, hyperParamSearch="randomizedSearch",
metrics=[["accuracy_score", None]], nIter=30, **args):
logging.debug("Start:\t Loading data")
kwargs, \
t_start, \
feat, \
CL_type, \
X, \
learningRate, \
labelsString, \
timestr, \
outputFileName = initConstants(args, X, classificationIndices, labelsNames, name, directory)
logging.debug("Done:\t Loading data")
logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train ratio:"
+ str(learningRate) + ", CrossValidation k-folds: " + str(KFolds.n_splits) + ", cores:"
+ str(nbCores) + ", algorithm : " + CL_type)
logging.debug("Start:\t Determine Train/Test split")
X_train, y_train, X_test, y_test = initTrainTest(X, Y, classificationIndices)
logging.debug("Info:\t Shape X_train:" + str(X_train.shape) + ", Length of y_train:" + str(len(y_train)))
logging.debug("Info:\t Shape X_test:" + str(X_test.shape) + ", Length of y_test:" + str(len(y_test)))
logging.debug("Done:\t Determine Train/Test split")
logging.debug("Start:\t Generate classifier args")
classifierModule = getattr(MonoviewClassifiers, CL_type)
clKWARGS = getKWARGS(classifierModule, hyperParamSearch,
nIter, CL_type, X_train, y_train,
randomState, outputFileName,
KFolds, nbCores, metrics, kwargs)
logging.debug("Done:\t Generate classifier args")
logging.debug("Start:\t Training")
cl_res = classifierModule.fit(X_train, y_train, randomState, NB_CORES=nbCores, **clKWARGS)
logging.debug("Done:\t Training")
logging.debug("Start:\t Predicting")
full_labels_pred = cl_res.predict(X)
y_train_pred = full_labels_pred[classificationIndices[0]]
y_test_pred = full_labels_pred[classificationIndices[1]]
logging.debug("Done:\t Predicting")
t_end = time.time() - t_start
logging.debug("Info:\t Time for training and predicting: " + str(t_end) + "[s]")
logging.debug("Start:\t Getting Results")
stringAnalysis, \
imagesAnalysis, \
metricsScores = execute(name, classificationIndices, KFolds, nbCores,
hyperParamSearch, metrics, nIter, feat, CL_type,
clKWARGS, labelsNames, X.shape,
y_train, y_train_pred, y_test, y_test_pred, t_end,
randomState, cl_res, outputFileName)
cl_desc = [value for key, value in sorted(clKWARGS.items())]
logging.debug("Done:\t Getting Results")
logging.debug("Start:\t Saving results")
saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis)
logging.info("Done:\t Saving Results")
viewIndex = args["viewIndex"]
return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels, clKWARGS]
return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels_pred, clKWARGS]
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='This methods permits to execute a multiclass classification with one single view. At this point the used classifier is a RandomForest. The GridSearch permits to vary the number of trees and CrossValidation with k-folds. The result will be a plot of the score per class and a CSV with the best classifier found by the GridSearch.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5")
groupStandard.add_argument('--name', metavar='STRING', action='store',
help='Name of Database (default: %(default)s)', default='DB')
groupStandard.add_argument('--feat', metavar='STRING', action='store',
help='Name of Feature for Classification (default: %(default)s)', default='RGB')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',
help='Path to the views (default: %(default)s)', default='Results-FeatExtr/')
groupStandard.add_argument('--fileCL', metavar='STRING', action='store',
help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv')
groupStandard.add_argument('--fileCLD', metavar='STRING', action='store',
help='Name of classLabels-Description CSV-file (default: %(default)s)',
default='classLabels-Description.csv')
groupStandard.add_argument('--fileFeat', metavar='STRING', action='store',
help='Name of feature CSV-file (default: %(default)s)', default='feature.csv')
groupClass = parser.add_argument_group('Classification arguments')
groupClass.add_argument('--CL_type', metavar='STRING', action='store', help='Classifier to use',
default="RandomForest")
groupClass.add_argument('--CL_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int,
default=10)
groupClass.add_argument('--CL_Cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
default=1)
groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Split ratio for train and test',
type=float, default=0.9)
groupClass.add_argument('--CL_metrics', metavar='STRING', action='store',
help='Determine which metrics to use, separate with ":" if multiple, if empty, considering all',
default='')
groupClassifier = parser.add_argument_group('Classifier Config')
groupClassifier.add_argument('--CL_config', metavar='STRING', nargs="+", action='store',
help='GridSearch: Determine the trees', default=['25:75:125:175'])
args = parser.parse_args()
classifierKWARGS = dict((key, value) for key, value in enumerate([arg.split(":") for arg in args.CL_config]))
### Main Programm
# Configure Logger
directory = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/"
logfilename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-LOG"
logfile = directory + logfilename
if os.path.isfile(logfile + ".log"):
for i in range(1, 20):
testFileName = logfilename + "-" + str(i) + ".log"
if not os.path.isfile(directory + testFileName):
logfile = directory + testFileName
break
else:
logfile += ".log"
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG,
filemode='w')
if args.log:
logging.getLogger().addHandler(logging.StreamHandler())
# Read the features
logging.debug("Start:\t Read " + args.type + " Files")
if args.type == ".csv":
X = np.genfromtxt(args.pathF + args.fileFeat, delimiter=';')
Y = np.genfromtxt(args.pathF + args.fileCL, delimiter=';')
elif args.type == ".hdf5":
dataset = h5py.File(args.pathF + args.name + ".hdf5", "r")
viewsDict = dict((dataset.get("View" + str(viewIndex)).attrs["name"], viewIndex) for viewIndex in
range(dataset.get("Metadata").attrs["nbView"]))
X = dataset["View" + str(viewsDict[args.feat])][...]
Y = dataset["Labels"][...]
logging.debug("Info:\t Shape of Feature:" + str(X.shape) + ", Length of classLabels vector:" + str(Y.shape))
logging.debug("Done:\t Read CSV Files")
arguments = {args.CL_type + "KWARGS": classifierKWARGS, "feat": args.feat, "fileFeat": args.fileFeat,
"fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type}
ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF,
metrics=args.CL_metrics, **arguments)
pass
# parser = argparse.ArgumentParser(
# description='This methods permits to execute a multiclass classification with one single view. At this point the used classifier is a RandomForest. The GridSearch permits to vary the number of trees and CrossValidation with k-folds. The result will be a plot of the score per class and a CSV with the best classifier found by the GridSearch.',
# formatter_class=argparse.ArgumentDefaultsHelpFormatter)
#
# groupStandard = parser.add_argument_group('Standard arguments')
# groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
# groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5")
# groupStandard.add_argument('--name', metavar='STRING', action='store',
# help='Name of Database (default: %(default)s)', default='DB')
# groupStandard.add_argument('--feat', metavar='STRING', action='store',
# help='Name of Feature for Classification (default: %(default)s)', default='RGB')
# groupStandard.add_argument('--pathF', metavar='STRING', action='store',
# help='Path to the views (default: %(default)s)', default='Results-FeatExtr/')
# groupStandard.add_argument('--fileCL', metavar='STRING', action='store',
# help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv')
# groupStandard.add_argument('--fileCLD', metavar='STRING', action='store',
# help='Name of classLabels-Description CSV-file (default: %(default)s)',
# default='classLabels-Description.csv')
# groupStandard.add_argument('--fileFeat', metavar='STRING', action='store',
# help='Name of feature CSV-file (default: %(default)s)', default='feature.csv')
#
# groupClass = parser.add_argument_group('Classification arguments')
# groupClass.add_argument('--CL_type', metavar='STRING', action='store', help='Classifier to use',
# default="RandomForest")
# groupClass.add_argument('--CL_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int,
# default=10)
# groupClass.add_argument('--CL_Cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int,
# default=1)
# groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Split ratio for train and test',
# type=float, default=0.9)
# groupClass.add_argument('--CL_metrics', metavar='STRING', action='store',
# help='Determine which metrics to use, separate with ":" if multiple, if empty, considering all',
# default='')
#
# groupClassifier = parser.add_argument_group('Classifier Config')
# groupClassifier.add_argument('--CL_config', metavar='STRING', nargs="+", action='store',
# help='GridSearch: Determine the trees', default=['25:75:125:175'])
#
# args = parser.parse_args()
#
# classifierKWARGS = dict((key, value) for key, value in enumerate([arg.split(":") for arg in args.CL_config]))
# ### Main Programm
#
#
# # Configure Logger
# directory = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/"
# logfilename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + args.name + "-" + args.feat + "-LOG"
# logfile = directory + logfilename
# if os.path.isfile(logfile + ".log"):
# for i in range(1, 20):
# testFileName = logfilename + "-" + str(i) + ".log"
# if not os.path.isfile(directory + testFileName):
# logfile = directory + testFileName
# break
# else:
# logfile += ".log"
#
# logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG,
# filemode='w')
#
# if args.log:
# logging.getLogger().addHandler(logging.StreamHandler())
#
# # Read the features
# logging.debug("Start:\t Read " + args.type + " Files")
#
# if args.type == ".csv":
# X = np.genfromtxt(args.pathF + args.fileFeat, delimiter=';')
# Y = np.genfromtxt(args.pathF + args.fileCL, delimiter=';')
# elif args.type == ".hdf5":
# dataset = h5py.File(args.pathF + args.name + ".hdf5", "r")
# viewsDict = dict((dataset.get("View" + str(viewIndex)).attrs["name"], viewIndex) for viewIndex in
# range(dataset.get("Metadata").attrs["nbView"]))
# X = dataset["View" + str(viewsDict[args.feat])][...]
# Y = dataset["Labels"][...]
#
# logging.debug("Info:\t Shape of Feature:" + str(X.shape) + ", Length of classLabels vector:" + str(Y.shape))
# logging.debug("Done:\t Read CSV Files")
#
# arguments = {args.CL_type + "KWARGS": classifierKWARGS, "feat": args.feat, "fileFeat": args.fileFeat,
# "fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": args.CL_type}
# ExecMonoview(X, Y, args.name, args.CL_split, args.CL_CV, args.CL_Cores, args.type, args.pathF,
# metrics=args.CL_metrics, **arguments)
#!/usr/bin/env python
""" Script to render versions of modules used """
# Import built-in modules
# Import 3rd party modules
# Import own modules
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25
def testVersions():
isUpToDate = True
toInstall = []
try:
import sys
# print("Python-V.: " + sys.version)
except:
# print "Please install Python 2.7"
except ImportError:
raise
try:
import pyscm
except:
# print "Please install pyscm"
except ImportError:
isUpToDate = False
toInstall.append("pyscm")
try:
import numpy
# print("Numpy-V.: " + numpy.version.version)
except:
# print "Please install numpy module"
except ImportError:
isUpToDate = False
toInstall.append("numpy")
try:
import scipy
# print("Scipy-V.: " + scipy.__version__)
except:
# print "Please install scipy module"
except ImportError:
isUpToDate = False
toInstall.append("scipy")
try:
import matplotlib
# print("Matplotlib-V.: " + matplotlib.__version__)
except:
# print "Please install matplotlib module"
except ImportError:
isUpToDate = False
toInstall.append("matplotlib")
try:
import sklearn
# print("Sklearn-V.: " + sklearn.__version__)
except:
# print "Please install sklearn module"
except ImportError:
isUpToDate = False
toInstall.append("sklearn")
try:
import logging # To create Log-Files
# print("Logging: " + logging.__version__)
except:
# print "Please install logging module"
import logging
except ImportError:
isUpToDate = False
toInstall.append("logging")
try:
import joblib
# print("joblib: " + joblib.__version__)
except:
# print "Pease install joblib module"
except ImportError:
isUpToDate = False
toInstall.append("joblib")
try:
import argparse
# print("argparse: " + argparse.__version__)
except:
# print "Pease install argparse module"
except ImportError:
isUpToDate = False
toInstall.append("argparse")
try:
import h5py #
# print("h5py: " + h5py.__version__)
except:
# print "Pease install h5py module"
except ImportError:
isUpToDate = False
toInstall.append("h5py")
try:
import graphviz #
except:
except ImportError:
isUpToDate = False
toInstall.append("graphviz")
try:
import pickle #
except:
except ImportError:
isUpToDate = False
toInstall.append("pickle")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment