Skip to content
Snippets Groups Projects
Commit 7c66af33 authored by bbauvin's avatar bbauvin
Browse files

Resolved randomized search for multicore issue

parent 640f1d7b
Branches
Tags
No related merge requests found
Showing
with 1631 additions and 38 deletions
...@@ -160,6 +160,7 @@ groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nar ...@@ -160,6 +160,7 @@ groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nar
args = parser.parse_args() args = parser.parse_args()
os.nice(args.nice) os.nice(args.nice)
nbCores = args.CL_cores nbCores = args.CL_cores
start = time.time()
if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake"]: if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake"]:
getDatabase = getattr(DB, "getClassicDB" + args.type[1:]) getDatabase = getattr(DB, "getClassicDB" + args.type[1:])
else: else:
...@@ -265,8 +266,9 @@ SGDKWARGS = {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_ ...@@ -265,8 +266,9 @@ SGDKWARGS = {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_
KNNKWARGS = {"0": map(float, args.CL_KNN_neigh.split(":"))[0]} KNNKWARGS = {"0": map(float, args.CL_KNN_neigh.split(":"))[0]}
AdaboostKWARGS = {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]} AdaboostKWARGS = {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]}
dataBaseTime = time.time()-start
argumentDictionaries = {"Monoview": {}, "Multiview": []} argumentDictionaries = {"Monoview": {}, "Multiview": []}
print benchmark
try: try:
if benchmark["Monoview"]: if benchmark["Monoview"]:
argumentDictionaries["Monoview"] = [] argumentDictionaries["Monoview"] = []
...@@ -308,6 +310,7 @@ else: ...@@ -308,6 +310,7 @@ else:
for viewIndex, view in enumerate(views): for viewIndex, view in enumerate(views):
bestClassifiers.append(classifiersNames[viewIndex][np.argmax(np.array(accuracies[viewIndex]))]) bestClassifiers.append(classifiersNames[viewIndex][np.argmax(np.array(accuracies[viewIndex]))])
bestClassifiersConfigs.append(classifiersConfigs[viewIndex][np.argmax(np.array(accuracies[viewIndex]))]) bestClassifiersConfigs.append(classifiersConfigs[viewIndex][np.argmax(np.array(accuracies[viewIndex]))])
monoviewTime = time.time()-dataBaseTime
try: try:
if benchmark["Multiview"]: if benchmark["Multiview"]:
try: try:
...@@ -376,12 +379,13 @@ else: ...@@ -376,12 +379,13 @@ else:
resultsMultiview = [ExecMultiview(DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, resultsMultiview = [ExecMultiview(DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF,
LABELS_DICTIONARY, gridSearch=gridSearch, LABELS_DICTIONARY, gridSearch=gridSearch,
metrics=metrics, **arguments) for arguments in argumentDictionaries["Multiview"]] metrics=metrics, **arguments) for arguments in argumentDictionaries["Multiview"]]
multiviewTime = time.time()-monoviewTime
if nbCores>1: if nbCores>1:
logging.debug("Start:\t Deleting "+str(nbCores)+" temporary datasets for multiprocessing") logging.debug("Start:\t Deleting "+str(nbCores)+" temporary datasets for multiprocessing")
datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
logging.debug("Start:\t Deleting datasets for multiprocessing") logging.debug("Start:\t Deleting datasets for multiprocessing")
times = [dataBaseTime, monoviewTime, multiviewTime]
results = (resultsMonoview, resultsMultiview) results = (resultsMonoview, resultsMultiview)
resultAnalysis(benchmark, results, args.name) resultAnalysis(benchmark, results, args.name)
......
...@@ -105,7 +105,7 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, ...@@ -105,7 +105,7 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path,
logging.debug("Start:\t Getting Results") logging.debug("Start:\t Getting Results")
#Accuracy classification score #Accuracy classification score
stringAnalysis, imagesAnalysis, train, ham, test = execute(name, learningRate, nbFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type, stringAnalysis, imagesAnalysis, metricsScores = execute(name, learningRate, nbFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type,
clKWARGS, classLabelsNames, X.shape, clKWARGS, classLabelsNames, X.shape,
y_train, y_train_pred, y_test, y_test_pred, t_end) y_train, y_train_pred, y_test, y_test_pred, t_end)
cl_desc = [value for key, value in sorted(clKWARGS.iteritems())] cl_desc = [value for key, value in sorted(clKWARGS.iteritems())]
...@@ -134,7 +134,7 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path, ...@@ -134,7 +134,7 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path,
logging.info("Done:\t Result Analysis") logging.info("Done:\t Result Analysis")
viewIndex = args["viewIndex"] viewIndex = args["viewIndex"]
return viewIndex, [CL_type, test, cl_desc, feat] return viewIndex, [CL_type, cl_desc.append(feat), metricsScores]
# # Classification Report with Precision, Recall, F1 , Support # # Classification Report with Precision, Recall, F1 , Support
# logging.debug("Info:\t Classification report:") # logging.debug("Info:\t Classification report:")
# filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Report" # filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Report"
......
...@@ -38,6 +38,7 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): ...@@ -38,6 +38,7 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred):
def execute(name, learningRate, nbFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type, clKWARGS, classLabelsNames, def execute(name, learningRate, nbFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type, clKWARGS, classLabelsNames,
shape, y_train, y_train_pred, y_test, y_test_pred, time): shape, y_train, y_train_pred, y_test, y_test_pred, time):
metricsScores = {}
metricModule = getattr(Metrics, metrics[0][0]) metricModule = getattr(Metrics, metrics[0][0])
train = metricModule.score(y_train, y_train_pred) train = metricModule.score(y_train, y_train_pred)
val = metricModule.score(y_test, y_test_pred) val = metricModule.score(y_test, y_test_pred)
...@@ -47,7 +48,13 @@ def execute(name, learningRate, nbFolds, nbCores, gridSearch, metrics, nIter, fe ...@@ -47,7 +48,13 @@ def execute(name, learningRate, nbFolds, nbCores, gridSearch, metrics, nIter, fe
stringAnalysis += getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS) stringAnalysis += getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS)
for metric in metrics: for metric in metrics:
stringAnalysis+=getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) stringAnalysis+=getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred)
if metric[1]!=None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
metricsScores[metric[0]] = [getattr(Metrics, metric[0]).score(y_train, y_train_pred, **metricKWARGS), "",
getattr(Metrics, metric[0]).score(y_test, y_test_pred, **metricKWARGS)]
stringAnalysis += "\n\n Classification took "+ str(hms(seconds=int(time))) stringAnalysis += "\n\n Classification took "+ str(hms(seconds=int(time)))
imageAnalysis = {} imageAnalysis = {}
return stringAnalysis, imageAnalysis, train, "", val return stringAnalysis, imageAnalysis, metricsScores
\ No newline at end of file \ No newline at end of file
...@@ -26,7 +26,7 @@ def ExecMultiview_multicore(coreIndex, name, learningRate, nbFolds, databaseType ...@@ -26,7 +26,7 @@ def ExecMultiview_multicore(coreIndex, name, learningRate, nbFolds, databaseType
gridSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): gridSearch=False, nbCores=1, metrics=None, nIter=30, **arguments):
DATASET = h5py.File(path+name+str(coreIndex)+".hdf5", "r") DATASET = h5py.File(path+name+str(coreIndex)+".hdf5", "r")
return ExecMultiview(DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, return ExecMultiview(DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY,
gridSearch=False, metrics=None, nIter=30, **arguments) gridSearch=gridSearch, metrics=None, nIter=30, **arguments)
def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY,
...@@ -59,15 +59,15 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -59,15 +59,15 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
logging.info("Start:\t Determine validation split for ratio " + str(learningRate)) logging.info("Start:\t Determine validation split for ratio " + str(learningRate))
validationIndices = DB.splitDataset(DATASET, learningRate, datasetLength) validationIndices = DB.splitDataset(DATASET, learningRate, datasetLength)
learningIndices = [index for index in range(datasetLength) if index not in validationIndices] learningIndices = [index for index in range(datasetLength) if index not in validationIndices]
datasetLength = len(learningIndices) classificationSetLength = len(learningIndices)
logging.info("Done:\t Determine validation split") logging.info("Done:\t Determine validation split")
logging.info("Start:\t Determine "+str(nbFolds)+" folds") logging.info("Start:\t Determine "+str(nbFolds)+" folds")
if nbFolds != 1: if nbFolds != 1:
kFolds = DB.getKFoldIndices(nbFolds, DATASET.get("labels")[...], NB_CLASS, learningIndices) kFolds = DB.getKFoldIndices(nbFolds, DATASET.get("labels")[...], NB_CLASS, learningIndices)
else: else:
kFolds = [[], range(datasetLength)] kFolds = [[], range(classificationSetLength)]
logging.info("Info:\t Length of Learning Sets: " + str(datasetLength - len(kFolds[0]))) logging.info("Info:\t Length of Learning Sets: " + str(classificationSetLength - len(kFolds[0])))
logging.info("Info:\t Length of Testing Sets: " + str(len(kFolds[0]))) logging.info("Info:\t Length of Testing Sets: " + str(len(kFolds[0])))
logging.info("Info:\t Length of Validation Set: " + str(len(validationIndices))) logging.info("Info:\t Length of Validation Set: " + str(len(validationIndices)))
logging.info("Done:\t Determine folds") logging.info("Done:\t Determine folds")
...@@ -89,7 +89,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -89,7 +89,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
kFoldPredictionTime = [] kFoldPredictionTime = []
kFoldClassifier = [] kFoldClassifier = []
gridSearch=True
if gridSearch: if gridSearch:
logging.info("Start:\t Randomsearching best settings for monoview classifiers") logging.info("Start:\t Randomsearching best settings for monoview classifiers")
bestSettings, fusionConfig = classifierGridSearch(DATASET, classificationKWARGS, learningIndices bestSettings, fusionConfig = classifierGridSearch(DATASET, classificationKWARGS, learningIndices
...@@ -104,10 +104,10 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -104,10 +104,10 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
logging.info("Start:\t Classification") logging.info("Start:\t Classification")
# Begin Classification # Begin Classification
for foldIdx, fold in enumerate(kFolds): for foldIdx, fold in enumerate(kFolds):
if fold != range(datasetLength): if fold != range(classificationSetLength):
fold.sort() fold.sort()
logging.info("\tStart:\t Fold number " + str(foldIdx + 1)) logging.info("\tStart:\t Fold number " + str(foldIdx + 1))
trainIndices = [index for index in range(datasetLength) if index not in fold] trainIndices = [index for index in range(datasetLength) if (index not in fold) and (index not in validationIndices)]
DATASET_LENGTH = len(trainIndices) DATASET_LENGTH = len(trainIndices)
classifier = classifierClass(NB_VIEW, DATASET_LENGTH, DATASET.get("labels").value, NB_CORES=nbCores, **classificationKWARGS) classifier = classifierClass(NB_VIEW, DATASET_LENGTH, DATASET.get("labels").value, NB_CORES=nbCores, **classificationKWARGS)
...@@ -132,7 +132,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -132,7 +132,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
times = (extractionTime, kFoldLearningTime, kFoldPredictionTime, classificationTime) times = (extractionTime, kFoldLearningTime, kFoldPredictionTime, classificationTime)
stringAnalysis, imagesAnalysis, train, test, val = analysisModule.execute(kFoldClassifier, kFoldPredictedTrainLabels, stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute(kFoldClassifier, kFoldPredictedTrainLabels,
kFoldPredictedTestLabels, kFoldPredictedValidationLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels,
DATASET, classificationKWARGS, learningRate, LABELS_DICTIONARY, DATASET, classificationKWARGS, learningRate, LABELS_DICTIONARY,
views, nbCores, times, kFolds, name, nbFolds, views, nbCores, times, kFolds, name, nbFolds,
...@@ -166,7 +166,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -166,7 +166,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png') imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')
logging.info("Done:\t Result Analysis") logging.info("Done:\t Result Analysis")
return CL_type, classificationKWARGS, train, test, val return CL_type, classificationKWARGS, metricsScores
if __name__=='__main__': if __name__=='__main__':
......
...@@ -44,6 +44,7 @@ def gridSearch_hdf5(DATASET, classificationKWARGS, learningIndices, metric=None, ...@@ -44,6 +44,7 @@ def gridSearch_hdf5(DATASET, classificationKWARGS, learningIndices, metric=None,
nIter=nIter)) nIter=nIter))
logging.debug("\tDone:\t Random search for "+classifierName) logging.debug("\tDone:\t Random search for "+classifierName)
classificationKWARGS["classifiersConfigs"] = bestSettings classificationKWARGS["classifiersConfigs"] = bestSettings
print bestSettings
fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS, learningIndices, nIter=nIter) fusionMethodConfig = fusionMethodModule.gridSearch(DATASET, classificationKWARGS, learningIndices, nIter=nIter)
return bestSettings, fusionMethodConfig return bestSettings, fusionMethodConfig
......
...@@ -51,6 +51,5 @@ class BayesianInference(LateFusionClassifier): ...@@ -51,6 +51,5 @@ class BayesianInference(LateFusionClassifier):
"\n\t-With monoview classifiers : " "\n\t-With monoview classifiers : "
for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames):
monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName)
print monoviewClassifierConfig
configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig)
return configString return configString
\ No newline at end of file
...@@ -32,6 +32,31 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): ...@@ -32,6 +32,31 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred):
metricScoreString += "\n" metricScoreString += "\n"
return metricScoreString return metricScoreString
def getTotalMetricScores(metric, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds):
labels = DATASET.get("labels").value
metricModule = getattr(Metrics, metric[0])
if metric[1]!=None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
trainScore = np.mean(np.array([metricModule.score([label for index, label in enumerate(labels) if index not in fold+validationIndices], predictedLabels, **metricKWARGS) for fold, predictedLabels in zip(kFolds, kFoldPredictedTrainLabels)]))
testScore = np.mean(np.array([metricModule.score(labels[fold], predictedLabels, **metricKWARGS) for fold, predictedLabels in zip(kFolds, kFoldPredictedTestLabels)]))
validationScore = np.mean(np.array([metricModule.score(labels[validationIndices], predictedLabels, **metricKWARGS) for predictedLabels in kFoldPredictedValidationLabels]))
return [trainScore, testScore, validationScore]
def getMetricsScores(metrics, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds):
metricsScores = {}
for metric in metrics:
metricsScores[metric[0]] = getTotalMetricScores(metric, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds)
return metricsScores
def execute(kFoldClassifier, kFoldPredictedTrainLabels, def execute(kFoldClassifier, kFoldPredictedTrainLabels,
kFoldPredictedTestLabels, kFoldPredictedValidationLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels,
...@@ -95,4 +120,6 @@ def execute(kFoldClassifier, kFoldPredictedTrainLabels, ...@@ -95,4 +120,6 @@ def execute(kFoldClassifier, kFoldPredictedTrainLabels,
str(hms(seconds=int(sum(kFoldPredictionTime))))]) str(hms(seconds=int(sum(kFoldPredictionTime))))])
stringAnalysis += "\n\tSo a total classification time of " + str(hms(seconds=int(classificationTime))) + ".\n\n" stringAnalysis += "\n\tSo a total classification time of " + str(hms(seconds=int(classificationTime))) + ".\n\n"
imagesAnalysis = {} imagesAnalysis = {}
return stringAnalysis, imagesAnalysis, totalAccuracyOnTrain, totalAccuracyOnTest, totalAccuracyOnValidation metricsScores = getMetricsScores(metrics, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds)
return stringAnalysis, imagesAnalysis, metricsScores
...@@ -131,7 +131,7 @@ def getAlgoConfig(initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times): ...@@ -131,7 +131,7 @@ def getAlgoConfig(initKWARGS, NB_CORES, viewNames, gridSearch, nIter, times):
def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationIndices, DATASET, def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationIndices, DATASET,
kFoldPredictedTrainLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels): kFoldPredictedTrainLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels):
DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"]-len(validationIndices) DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"]
nbView = DATASET.get("Metadata").attrs["nbView"] nbView = DATASET.get("Metadata").attrs["nbView"]
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
kFoldPredictedTrainLabelsByIter = [] kFoldPredictedTrainLabelsByIter = []
...@@ -152,7 +152,7 @@ def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationInd ...@@ -152,7 +152,7 @@ def getClassificationReport(kFolds, kFoldClassifier, CLASS_LABELS, validationInd
mumboClassifier = kFoldClassifier[foldIdx] mumboClassifier = kFoldClassifier[foldIdx]
meanAverageAccuracies = np.mean(mumboClassifier.averageAccuracies, axis=0) meanAverageAccuracies = np.mean(mumboClassifier.averageAccuracies, axis=0)
kFoldMeanAverageAccuracies.append(meanAverageAccuracies) kFoldMeanAverageAccuracies.append(meanAverageAccuracies)
trainIndices = [index for index in range(DATASET_LENGTH) if index not in fold] trainIndices = [index for index in range(DATASET_LENGTH) if (index not in fold) and (index not in validationIndices)]
testLabels = CLASS_LABELS[fold] testLabels = CLASS_LABELS[fold]
trainLabels = CLASS_LABELS[trainIndices] trainLabels = CLASS_LABELS[trainIndices]
validationLabels = CLASS_LABELS[validationIndices] validationLabels = CLASS_LABELS[validationIndices]
...@@ -232,6 +232,31 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): ...@@ -232,6 +232,31 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred):
metricScoreString += "\n" metricScoreString += "\n"
return metricScoreString return metricScoreString
def getTotalMetricScores(metric, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds):
labels = DATASET.get("labels").value
metricModule = getattr(Metrics, metric[0])
if metric[1]!=None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
trainScore = np.mean(np.array([metricModule.score([label for index, label in enumerate(labels) if (index not in fold) and (index not in validationIndices)], predictedLabels, **metricKWARGS) for fold, predictedLabels in zip(kFolds, kFoldPredictedTrainLabels)]))
testScore = np.mean(np.array([metricModule.score(labels[fold], predictedLabels, **metricKWARGS) for fold, predictedLabels in zip(kFolds, kFoldPredictedTestLabels)]))
validationScore = np.mean(np.array([metricModule.score(labels[validationIndices], predictedLabels, **metricKWARGS) for predictedLabels in kFoldPredictedValidationLabels]))
return [trainScore, testScore, validationScore]
def getMetricsScores(metrics, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds,):
metricsScores = {}
for metric in metrics:
metricsScores[metric[0]] = getTotalMetricScores(metric, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds)
return metricsScores
def execute(kFoldClassifier, kFoldPredictedTrainLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels, def execute(kFoldClassifier, kFoldPredictedTrainLabels, kFoldPredictedTestLabels, kFoldPredictedValidationLabels,
DATASET, initKWARGS, LEARNING_RATE, LABELS_DICTIONARY, views, NB_CORES, times, kFolds, databaseName, DATASET, initKWARGS, LEARNING_RATE, LABELS_DICTIONARY, views, NB_CORES, times, kFolds, databaseName,
nbFolds, validationIndices, gridSearch, nIter, metrics): nbFolds, validationIndices, gridSearch, nIter, metrics):
...@@ -280,7 +305,6 @@ def execute(kFoldClassifier, kFoldPredictedTrainLabels, kFoldPredictedTestLabels ...@@ -280,7 +305,6 @@ def execute(kFoldClassifier, kFoldPredictedTrainLabels, kFoldPredictedTestLabels
" : \n\t\t\t- Mean average Accuracy : "+str(meanAverageAccuracy)+\ " : \n\t\t\t- Mean average Accuracy : "+str(meanAverageAccuracy)+\
"\n\t\t\t- Percentage of time chosen : "+str(bestViewStat) "\n\t\t\t- Percentage of time chosen : "+str(bestViewStat)
stringAnalysis += "\n\n For each iteration : " stringAnalysis += "\n\n For each iteration : "
print iterRelevant(0, kFoldClassifier)
for iterIndex in range(maxIter): for iterIndex in range(maxIter):
if iterRelevant(iterIndex, kFoldClassifier).any(): if iterRelevant(iterIndex, kFoldClassifier).any():
stringAnalysis += "\n\t- Iteration " + str(iterIndex + 1) stringAnalysis += "\n\t- Iteration " + str(iterIndex + 1)
...@@ -294,9 +318,9 @@ def execute(kFoldClassifier, kFoldPredictedTrainLabels, kFoldPredictedTestLabels ...@@ -294,9 +318,9 @@ def execute(kFoldClassifier, kFoldPredictedTrainLabels, kFoldPredictedTestLabels
trainAccuracyByIter = list(formatedAccuracies["Train"].mean(axis=0))+modifiedMean(surplusAccuracies["Train"]) trainAccuracyByIter = list(formatedAccuracies["Train"].mean(axis=0))+modifiedMean(surplusAccuracies["Train"])
testAccuracyByIter = list(formatedAccuracies["Test"].mean(axis=0))+modifiedMean(surplusAccuracies["Test"]) testAccuracyByIter = list(formatedAccuracies["Test"].mean(axis=0))+modifiedMean(surplusAccuracies["Test"])
validationAccuracyByIter = list(formatedAccuracies["Validation"].mean(axis=0))+modifiedMean(surplusAccuracies["Validation"]) validationAccuracyByIter = list(formatedAccuracies["Validation"].mean(axis=0))+modifiedMean(surplusAccuracies["Validation"])
print nbMaxIter
name, image = plotAccuracyByIter(trainAccuracyByIter, testAccuracyByIter, validationAccuracyByIter, nbMaxIter, name, image = plotAccuracyByIter(trainAccuracyByIter, testAccuracyByIter, validationAccuracyByIter, nbMaxIter,
bestViews, views, classifierAnalysis) bestViews, views, classifierAnalysis)
imagesAnalysis = {name: image} imagesAnalysis = {name: image}
metricsScores = getMetricsScores(metrics, kFoldPredictedTrainLabels, kFoldPredictedTestLabels,
return stringAnalysis, imagesAnalysis, totalAccuracyOnTrain, totalAccuracyOnTest, totalAccuracyOnValidation kFoldPredictedValidationLabels, DATASET, validationIndices, kFolds)
return stringAnalysis, imagesAnalysis, metricsScores
# Import built-in modules # Import built-in modules
import time import time
import pylab import pylab
import logging
# Import third party modules # Import third party modules
import matplotlib import matplotlib
...@@ -12,22 +13,24 @@ __author__ = "Baptiste Bauvin" ...@@ -12,22 +13,24 @@ __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype __status__ = "Prototype" # Production, Development, Prototype
def resultAnalysis(benchmark, results, name): def resultAnalysis(benchmark, results, name, times, metrics):
for metric in metrics:
mono, multi = results mono, multi = results
names = [res[1][0]+res[1][3] for res in mono] names = [res[1][0]+"-"+res[1][1][-1] for res in mono]
names+=[type_ if type_ != "Fusion" else a["fusionType"]+a["fusionMethod"] for type_, a, b, c, d in multi] names+=[type_ if type_ != "Fusion" else a["fusionType"]+"-"+a["fusionMethod"] for type_, a, b in multi]
nbResults = len(mono)+len(multi) nbResults = len(mono)+len(multi)
accuracies = [100*float(res[1][1]) for res in mono] validationScores = [float(res[1][2][metric[0]][2]) for res in mono]
accuracies += [float(accuracy) for a, b, c, d, accuracy in multi] validationScores += [float(scores[metric[0]][2]) for a, b, scores in multi]
f = pylab.figure(figsize=(40, 30)) f = pylab.figure(figsize=(40, 30))
fig = plt.gcf() fig = plt.gcf()
fig.subplots_adjust(bottom=105.0, top=105.01) fig.subplots_adjust(bottom=105.0, top=105.01)
ax = f.add_axes([0.1, 0.1, 0.8, 0.8]) ax = f.add_axes([0.1, 0.1, 0.8, 0.8])
ax.set_title("Accuracies on validation set for each classifier") ax.set_title(metric[0]+" on validation set for each classifier")
ax.bar(range(nbResults), accuracies, align='center') ax.bar(range(nbResults), validationScores, align='center')
ax.set_xticks(range(nbResults)) ax.set_xticks(range(nbResults))
ax.set_xticklabels(names, rotation="vertical") ax.set_xticklabels(names, rotation="vertical")
f.savefig("Results/"+name+time.strftime("%Y%m%d-%H%M%S")+".png") f.savefig("Results/"+name+"-"+metric[0]+"-"+time.strftime("%Y%m%d-%H%M%S")+".png")
logging.info("Extraction time : "+str(times[0])+"s, Monoview time : "+str(times[1])+"s, Multiview Time : "+str(times[2])+"s")
2016-09-06 09:25:57,714 INFO: Start: Finding all available mono- & multiview algorithms
2016-09-06 09:25:57,717 DEBUG: ### Main Programm for Classification MonoView
2016-09-06 09:25:57,717 DEBUG: ### Classification - Database:Fake Feature:View0 train_size:0.7, CrossValidation k-folds:5, cores:1, algorithm : Adaboost
2016-09-06 09:25:57,717 DEBUG: Start: Determine Train/Test split
2016-09-06 09:25:57,717 DEBUG: Info: Shape X_train:(210, 10), Length of y_train:210
2016-09-06 09:25:57,717 DEBUG: Info: Shape X_test:(90, 10), Length of y_test:90
2016-09-06 09:25:57,717 DEBUG: Done: Determine Train/Test split
2016-09-06 09:25:57,717 DEBUG: Start: RandomSearch best settings with 30 iterations
2016-09-06 10:06:22,879 INFO: Start: Finding all available mono- & multiview algorithms
2016-09-06 10:06:22,881 DEBUG: ### Main Programm for Classification MonoView
2016-09-06 10:06:22,881 DEBUG: ### Classification - Database:Fake Feature:View0 train_size:0.7, CrossValidation k-folds:5, cores:1, algorithm : Adaboost
2016-09-06 10:06:22,881 DEBUG: Start: Determine Train/Test split
2016-09-06 10:06:22,882 DEBUG: Info: Shape X_train:(210, 18), Length of y_train:210
2016-09-06 10:06:22,882 DEBUG: Info: Shape X_test:(90, 18), Length of y_test:90
2016-09-06 10:06:22,882 DEBUG: Done: Determine Train/Test split
2016-09-06 10:06:22,882 DEBUG: Start: RandomSearch best settings with 1 iterations
2016-09-06 10:06:22,965 DEBUG: Done: RandomSearch best settings
2016-09-06 10:06:22,965 DEBUG: Start: Training
2016-09-06 10:06:22,970 DEBUG: Info: Time for Training: 0.0897569656372[s]
2016-09-06 10:06:22,971 DEBUG: Done: Training
2016-09-06 10:06:22,971 DEBUG: Start: Predicting
2016-09-06 10:06:22,984 DEBUG: Done: Predicting
2016-09-06 10:06:22,984 DEBUG: Start: Getting Results
Classification on Fake database for View1 with Adaboost
accuracy_score on train : 1.0
accuracy_score on test : 0.555555555556
Database configuration :
- Database name : Fake
- View name : View1 View shape : (300, 8)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Adaboost with num_esimators : 13, base_estimators : DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, random_state=None, splitter='best')
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.555555555556
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View1 with DecisionTree
accuracy_score on train : 1.0
accuracy_score on test : 0.544444444444
Database configuration :
- Database name : Fake
- View name : View1 View shape : (300, 8)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Decision Tree with max_depth : 21
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.544444444444
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View1 with KNN
accuracy_score on train : 0.619047619048
accuracy_score on test : 0.588888888889
Database configuration :
- Database name : Fake
- View name : View1 View shape : (300, 8)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- K nearest Neighbors with n_neighbors: 26
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.619047619048
- Score on test : 0.588888888889
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with RandomForest
accuracy_score on train : 0.766666666667
accuracy_score on test : 0.433333333333
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 11)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Random Forest with num_esimators : 18, max_depth : 3
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.766666666667
- Score on test : 0.433333333333
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SGD
accuracy_score on train : 0.62380952381
accuracy_score on test : 0.477777777778
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 11)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SGDClassifier with loss : modified_huber, penalty : elasticnet
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.62380952381
- Score on test : 0.477777777778
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SVMLinear
accuracy_score on train : 0.52380952381
accuracy_score on test : 0.444444444444
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 11)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SVM Linear with C : 491
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.52380952381
- Score on test : 0.444444444444
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SVMPoly
accuracy_score on train : 1.0
accuracy_score on test : 0.522222222222
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 11)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SVM Linear with C : 2405
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.522222222222
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SVMRBF
accuracy_score on train : 1.0
accuracy_score on test : 0.411111111111
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 11)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SVM Linear with C : 9676
- Executed on 1 core(s)
- Got configuration using randomized search with 1 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.411111111111
Classification took 0:00:00
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment