Skip to content
Snippets Groups Projects
Commit 334314f6 authored by bbauvin's avatar bbauvin
Browse files

I leave

parent d8f03841
Branches
Tags
No related merge requests found
Showing
with 57 additions and 1771 deletions
...@@ -25,6 +25,11 @@ from ResultAnalysis import resultAnalysis ...@@ -25,6 +25,11 @@ from ResultAnalysis import resultAnalysis
from Versions import testVersions from Versions import testVersions
import MonoviewClassifiers import MonoviewClassifiers
import matplotlib.pyplot as plt
from matplotlib import cm
from numpy.random import randint
import random
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype __status__ = "Prototype" # Production, Development, Prototype
...@@ -104,7 +109,7 @@ def initBenchmark(args): ...@@ -104,7 +109,7 @@ def initBenchmark(args):
allMumboAlgos = [name for _, name, isPackage in allMumboAlgos = [name for _, name, isPackage in
pkgutil.iter_modules(['Multiview/Mumbo/Classifiers']) pkgutil.iter_modules(['Multiview/Mumbo/Classifiers'])
if not isPackage and not name in ["SubSampling", "ModifiedMulticlass", "Kover"]] if not isPackage and not name in ["SubSampling", "ModifiedMulticlass", "Kover"]]
allMultiviewAlgos = {"Fusion": allFusionAlgos, "Mumbo": allMumboAlgos} allMultiviewAlgos = {"Fusion": allFusionAlgos}#, "Mumbo": allMumboAlgos}
benchmark = {"Monoview": allMonoviewAlgos, "Multiview": allMultiviewAlgos} benchmark = {"Monoview": allMonoviewAlgos, "Multiview": allMultiviewAlgos}
if "Multiview" in args.CL_type.strip(":"): if "Multiview" in args.CL_type.strip(":"):
...@@ -290,6 +295,24 @@ def initMultiviewArguments(args, benchmark, views, viewsIndices, accuracies, cla ...@@ -290,6 +295,24 @@ def initMultiviewArguments(args, benchmark, views, viewsIndices, accuracies, cla
pass pass
return argumentDictionaries return argumentDictionaries
def analyzeLabels(labelsArrays, realLabels, classifiersNames):
nbClassifiers = len(classifiersNames)
nbExamples = realLabels.shape[0]
nbIter = nbExamples/nbClassifiers
data = np.zeros((nbExamples, nbClassifiers*nbIter))
tempData = np.array([labelsArray == realLabels for labelsArray in labelsArrays]).astype(int)
for classifierIndex in range(nbClassifiers):
for iterIndex in range(nbIter):
data[:,classifierIndex*nbIter+iterIndex] = tempData[:,classifierIndex]
fig, ax = plt.subplots()
cax = ax.imshow(data, interpolation='nearest', cmap=cm.coolwarm)
ax.set_title('Gaussian noise with vertical colorbar')
cbar = fig.colorbar(cax, ticks=[0, 1])
cbar.ax.set_yticklabels(['Wrong', ' Right'])
fig.savefig("test.png")
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='This file is used to benchmark the accuracies fo multiple classification algorithm on multiview data.', description='This file is used to benchmark the accuracies fo multiple classification algorithm on multiview data.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter) formatter_class=argparse.ArgumentDefaultsHelpFormatter)
...@@ -480,6 +503,7 @@ if nbCores>1: ...@@ -480,6 +503,7 @@ if nbCores>1:
accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
else: else:
resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])), resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])),
DATASET.get("Labels").value, args.name, labelsNames, DATASET.get("Labels").value, args.name, labelsNames,
...@@ -513,7 +537,9 @@ if nbCores>1: ...@@ -513,7 +537,9 @@ if nbCores>1:
logging.debug("Start:\t Deleting "+str(nbCores)+" temporary datasets for multiprocessing") logging.debug("Start:\t Deleting "+str(nbCores)+" temporary datasets for multiprocessing")
datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
logging.debug("Start:\t Deleting datasets for multiprocessing") logging.debug("Start:\t Deleting datasets for multiprocessing")
labels = np.array([resultMonoview[1][3] for resultMonoview in resultsMonoview]+[resultMultiview[3] for resultMultiview in resultsMultiview]).transpose()
trueLabels = DATASET.get("Labels").value
analyzeLabels(labels, trueLabels, ["" in range(labels.shape[1])])
times = [dataBaseTime, monoviewTime, multiviewTime] times = [dataBaseTime, monoviewTime, multiviewTime]
# times=[] # times=[]
results = (resultsMonoview, resultsMultiview) results = (resultsMonoview, resultsMultiview)
......
...@@ -103,6 +103,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa ...@@ -103,6 +103,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa
y_train_preds.append(y_train_pred) y_train_preds.append(y_train_pred)
y_tests.append(y_test) y_tests.append(y_test)
y_test_preds.append(y_test_pred) y_test_preds.append(y_test_pred)
full_labels = cl_res.predict(X)
logging.debug("Done:\t Predicting") logging.debug("Done:\t Predicting")
t_end = time.time() - t_start t_end = time.time() - t_start
logging.debug("Info:\t Time for training and predicting: " + str(t_end) + "[s]") logging.debug("Info:\t Time for training and predicting: " + str(t_end) + "[s]")
...@@ -138,7 +139,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa ...@@ -138,7 +139,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa
logging.info("Done:\t Result Analysis") logging.info("Done:\t Result Analysis")
viewIndex = args["viewIndex"] viewIndex = args["viewIndex"]
return viewIndex, [CL_type, cl_desc+[feat], metricsScores] return viewIndex, [CL_type, cl_desc+[feat], metricsScores, full_labels]
# # Classification Report with Precision, Recall, F1 , Support # # Classification Report with Precision, Recall, F1 , Support
# logging.debug("Info:\t Classification report:") # logging.debug("Info:\t Classification report:")
# filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Report" # filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Report"
......
...@@ -95,6 +95,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -95,6 +95,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices) classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices)
trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices)
testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices) testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices)
fullLabels = classifier.predict_hdf5(DATASET, viewsIndices=viewsIndices)
trainLabelsIterations.append(trainLabels) trainLabelsIterations.append(trainLabels)
testLabelsIterations.append(testLabels) testLabelsIterations.append(testLabels)
ivalidationIndices.append(validationIndices) ivalidationIndices.append(validationIndices)
...@@ -144,7 +145,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -144,7 +145,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png') imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')
logging.info("Done:\t Result Analysis") logging.info("Done:\t Result Analysis")
return CL_type, classificationKWARGS, metricsScores return CL_type, classificationKWARGS, metricsScores, fullLabels
if __name__=='__main__': if __name__=='__main__':
......
...@@ -10,6 +10,7 @@ import Methods.LateFusion ...@@ -10,6 +10,7 @@ import Methods.LateFusion
import Metrics import Metrics
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype __status__ = "Prototype" # Production, Development, Prototype
...@@ -20,6 +21,7 @@ def error(testLabels, computedLabels): ...@@ -20,6 +21,7 @@ def error(testLabels, computedLabels):
return float(error) * 100 / len(computedLabels) return float(error) * 100 / len(computedLabels)
def printMetricScore(metricScores, metrics): def printMetricScore(metricScores, metrics):
metricScoreString = "\n\n" metricScoreString = "\n\n"
for metric in metrics: for metric in metrics:
......
...@@ -8,9 +8,11 @@ import matplotlib ...@@ -8,9 +8,11 @@ import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from matplotlib import cm
#Import own Modules #Import own Modules
import Metrics import Metrics
from utils.Transformations import signLabels
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
...@@ -26,8 +28,10 @@ def autolabel(rects, ax): ...@@ -26,8 +28,10 @@ def autolabel(rects, ax):
def resultAnalysis(benchmark, results, name, times, metrics): def resultAnalysis(benchmark, results, name, times, metrics):
for metric in metrics:
mono, multi = results mono, multi = results
labelsByView = np.array([res[0][3] for res in mono]+[res[3] for res in multi])
makeColorMap(labelsByView, name)
for metric in metrics:
names = [res[1][0]+"-"+res[1][1][-1] for res in mono] names = [res[1][0]+"-"+res[1][1][-1] for res in mono]
names+=[type_ for type_, a, b in multi if type_ != "Fusion"] names+=[type_ for type_, a, b in multi if type_ != "Fusion"]
names+=[ "Late-"+str(a["fusionMethod"]) for type_, a, b in multi if type_ == "Fusion" and a["fusionType"] != "EarlyFusion"] names+=[ "Late-"+str(a["fusionMethod"]) for type_, a, b in multi if type_ == "Fusion" and a["fusionType"] != "EarlyFusion"]
...@@ -63,3 +67,21 @@ def resultAnalysis(benchmark, results, name, times, metrics): ...@@ -63,3 +67,21 @@ def resultAnalysis(benchmark, results, name, times, metrics):
logging.info("Extraction time : "+str(times[0])+"s, Monoview time : "+str(times[1])+"s, Multiview Time : "+str(times[2])+"s") logging.info("Extraction time : "+str(times[0])+"s, Monoview time : "+str(times[1])+"s, Multiview Time : "+str(times[2])+"s")
def makeColorMap(labelsByView, name):
nb_view = labelsByView.shape[1]
nbExamples = labelsByView.shape[0]
# Make plot with vertical (default) colorbar
fig, ax = plt.subplots()
data = np.zeros((nbExamples,nbExamples), dtype=int)
datap = np.array([signLabels(labels) for labels in labelsByView])
nbRepet = nbExamples/nb_view
for j in range(nb_view):
for i in range(nbRepet):
data[:, j*50+i] = datap[:, j]
cax = ax.imshow(data, interpolation='nearest', cmap=cm.coolwarm)
ax.set_title('Labels per view')
cbar = fig.colorbar(cax, ticks=[0, 1])
cbar.ax.set_yticklabels(['-1', ' 1']) # vertically oriented colorbar
plt.show()
fig.savefig("Results/"+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-labels.png")
Classification on Fake database for View0 with Adaboost
accuracy_score on train : 1.0
accuracy_score on test : 0.533333333333
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Adaboost with num_esimators : 9, base_estimators : DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, random_state=None, splitter='best')
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.533333333333
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with DecisionTree
accuracy_score on train : 1.0
accuracy_score on test : 0.533333333333
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Decision Tree with max_depth : 25
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.533333333333
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with KNN
accuracy_score on train : 0.595238095238
accuracy_score on test : 0.622222222222
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- K nearest Neighbors with n_neighbors: 31
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.595238095238
- Score on test : 0.622222222222
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with RandomForest
accuracy_score on train : 1.0
accuracy_score on test : 0.5
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Random Forest with num_esimators : 26, max_depth : 23
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.5
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SVMLinear
accuracy_score on train : 0.428571428571
accuracy_score on test : 0.477777777778
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SVM Linear with C : 6231
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.428571428571
- Score on test : 0.477777777778
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SGD
accuracy_score on train : 0.571428571429
accuracy_score on test : 0.422222222222
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SGDClassifier with loss : perceptron, penalty : l2
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.571428571429
- Score on test : 0.422222222222
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SVMPoly
accuracy_score on train : 1.0
accuracy_score on test : 0.533333333333
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SVM Linear with C : 6464
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.533333333333
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View0 with SVMRBF
accuracy_score on train : 1.0
accuracy_score on test : 0.488888888889
Database configuration :
- Database name : Fake
- View name : View0 View shape : (300, 17)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- SVM Linear with C : 6231
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.488888888889
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View1 with Adaboost
accuracy_score on train : 1.0
accuracy_score on test : 0.577777777778
Database configuration :
- Database name : Fake
- View name : View1 View shape : (300, 14)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Adaboost with num_esimators : 7, base_estimators : DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, random_state=None, splitter='best')
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.577777777778
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View1 with DecisionTree
accuracy_score on train : 1.0
accuracy_score on test : 0.533333333333
Database configuration :
- Database name : Fake
- View name : View1 View shape : (300, 14)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Decision Tree with max_depth : 25
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.533333333333
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View1 with KNN
accuracy_score on train : 0.552380952381
accuracy_score on test : 0.522222222222
Database configuration :
- Database name : Fake
- View name : View1 View shape : (300, 14)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- K nearest Neighbors with n_neighbors: 31
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 0.552380952381
- Score on test : 0.522222222222
Classification took 0:00:00
\ No newline at end of file
Classification on Fake database for View2 with DecisionTree
accuracy_score on train : 1.0
accuracy_score on test : 0.511111111111
Database configuration :
- Database name : Fake
- View name : View2 View shape : (300, 11)
- Learning Rate : 0.7
- Labels used : Non, Oui
- Number of cross validation folds : 5
Classifier configuration :
- Decision Tree with max_depth : 25
- Executed on 1 core(s)
- Got configuration using randomized search with 2 iterations
For Accuracy score using None as sample_weights (higher is better) :
- Score on train : 1.0
- Score on test : 0.511111111111
Classification took 0:00:00
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment