I leave

334314f6 · bbauvin · d8f03841 · 334314f6 · 334314f6 · 334314f6
Commit 334314f6 authored 8 years ago by bbauvin
--- a/Code/MonoMutliViewClassifiers/ExecClassif.py
+++ b/Code/MonoMutliViewClassifiers/ExecClassif.py
@@ -25,6 +25,11 @@ from ResultAnalysis import resultAnalysis
 from Versions import testVersions
 import MonoviewClassifiers
+import matplotlib.pyplot as plt
+from matplotlib import cm
+from numpy.random import randint
+import random
 # Author-Info
 __author__ 	= "Baptiste Bauvin"
 __status__ 	= "Prototype"                           # Production, Development, Prototype
@@ -104,7 +109,7 @@ def initBenchmark(args):
        allMumboAlgos = [name for _, name, isPackage in
                         pkgutil.iter_modules(['Multiview/Mumbo/Classifiers'])
                         if not isPackage and not name in ["SubSampling", "ModifiedMulticlass", "Kover"]]
-        allMultiviewAlgos = {"Fusion": allFusionAlgos, "Mumbo": allMumboAlgos}
+        allMultiviewAlgos = {"Fusion": allFusionAlgos}#, "Mumbo": allMumboAlgos}
        benchmark = {"Monoview": allMonoviewAlgos, "Multiview": allMultiviewAlgos}
    if "Multiview" in args.CL_type.strip(":"):
@@ -290,6 +295,24 @@ def initMultiviewArguments(args, benchmark, views, viewsIndices, accuracies, cla
            pass
    return argumentDictionaries
+def analyzeLabels(labelsArrays, realLabels, classifiersNames):
+    nbClassifiers = len(classifiersNames)
+    nbExamples = realLabels.shape[0]
+    nbIter = nbExamples/nbClassifiers
+    data = np.zeros((nbExamples, nbClassifiers*nbIter))
+    tempData = np.array([labelsArray == realLabels for labelsArray in labelsArrays]).astype(int)
+    for classifierIndex in range(nbClassifiers):
+        for iterIndex in range(nbIter):
+            data[:,classifierIndex*nbIter+iterIndex] = tempData[:,classifierIndex]
+    fig, ax = plt.subplots()
+    cax = ax.imshow(data, interpolation='nearest', cmap=cm.coolwarm)
+    ax.set_title('Gaussian noise with vertical colorbar')
+    cbar = fig.colorbar(cax, ticks=[0, 1])
+    cbar.ax.set_yticklabels(['Wrong', ' Right'])
+    fig.savefig("test.png")
 parser = argparse.ArgumentParser(
    description='This file is used to benchmark the accuracies fo multiple classification algorithm on multiview data.',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@@ -480,6 +503,7 @@ if nbCores>1:
    accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
    classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
    classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
 else:
    resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])),
                                    DATASET.get("Labels").value, args.name, labelsNames,
@@ -513,7 +537,9 @@ if nbCores>1:
    logging.debug("Start:\t Deleting "+str(nbCores)+" temporary datasets for multiprocessing")
    datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores)
    logging.debug("Start:\t Deleting datasets for multiprocessing")
+labels = np.array([resultMonoview[1][3] for resultMonoview in resultsMonoview]+[resultMultiview[3] for resultMultiview in resultsMultiview]).transpose()
+trueLabels = DATASET.get("Labels").value
+analyzeLabels(labels, trueLabels, ["" in range(labels.shape[1])])
 times = [dataBaseTime, monoviewTime, multiviewTime]
 # times=[]
 results = (resultsMonoview, resultsMultiview)

--- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py
+++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py
@@ -103,6 +103,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa
        y_train_preds.append(y_train_pred)
        y_tests.append(y_test)
        y_test_preds.append(y_test_pred)
+        full_labels = cl_res.predict(X)
        logging.debug("Done:\t Predicting")
    t_end  = time.time() - t_start
    logging.debug("Info:\t Time for training and predicting: " + str(t_end) + "[s]")
@@ -138,7 +139,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa
    logging.info("Done:\t Result Analysis")
    viewIndex = args["viewIndex"]
-    return viewIndex, [CL_type, cl_desc+[feat], metricsScores]
+    return viewIndex, [CL_type, cl_desc+[feat], metricsScores, full_labels]
    # # Classification Report with Precision, Recall, F1 , Support
    # logging.debug("Info:\t Classification report:")
    # filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-CMV-" + name + "-" + feat + "-Report"

--- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py
+++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py
@@ -95,6 +95,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
        classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices)
        trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices)
        testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices)
+        fullLabels = classifier.predict_hdf5(DATASET, viewsIndices=viewsIndices)
        trainLabelsIterations.append(trainLabels)
        testLabelsIterations.append(testLabels)
        ivalidationIndices.append(validationIndices)
@@ -144,7 +145,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
            imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')
    logging.info("Done:\t Result Analysis")
-    return CL_type, classificationKWARGS, metricsScores
+    return CL_type, classificationKWARGS, metricsScores, fullLabels
 if __name__=='__main__':

--- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py
+++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py
@@ -10,6 +10,7 @@ import Methods.LateFusion
 import Metrics
 # Author-Info
 __author__ 	= "Baptiste Bauvin"
 __status__ 	= "Prototype"                           # Production, Development, Prototype
@@ -20,6 +21,7 @@ def error(testLabels, computedLabels):
    return float(error) * 100 / len(computedLabels)
 def printMetricScore(metricScores, metrics):
    metricScoreString = "\n\n"
    for metric in metrics:

--- a/Code/MonoMutliViewClassifiers/ResultAnalysis.py
+++ b/Code/MonoMutliViewClassifiers/ResultAnalysis.py
@@ -8,9 +8,11 @@ import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
+from matplotlib import cm
 #Import own Modules
 import Metrics
+from utils.Transformations import signLabels
 # Author-Info
 __author__ 	= "Baptiste Bauvin"
@@ -26,8 +28,10 @@ def autolabel(rects, ax):
 def resultAnalysis(benchmark, results, name, times, metrics):
-    for metric in metrics:
    mono, multi = results
+    labelsByView = np.array([res[0][3] for res in mono]+[res[3] for res in multi])
+    makeColorMap(labelsByView, name)
+    for metric in metrics:
        names = [res[1][0]+"-"+res[1][1][-1] for res in mono]
        names+=[type_ for type_, a, b in multi if type_ != "Fusion"]
        names+=[ "Late-"+str(a["fusionMethod"]) for type_, a, b in multi if type_ == "Fusion" and a["fusionType"] != "EarlyFusion"]
@@ -63,3 +67,21 @@ def resultAnalysis(benchmark, results, name, times, metrics):
    logging.info("Extraction time : "+str(times[0])+"s, Monoview time : "+str(times[1])+"s, Multiview Time : "+str(times[2])+"s")
+def makeColorMap(labelsByView, name):
+    nb_view = labelsByView.shape[1]
+    nbExamples = labelsByView.shape[0]
+    # Make plot with vertical (default) colorbar
+    fig, ax = plt.subplots()
+    data = np.zeros((nbExamples,nbExamples), dtype=int)
+    datap = np.array([signLabels(labels) for labels in labelsByView])
+    nbRepet = nbExamples/nb_view
+    for j in range(nb_view):
+        for i in range(nbRepet):
+            data[:, j*50+i] = datap[:, j]
+    cax = ax.imshow(data, interpolation='nearest', cmap=cm.coolwarm)
+    ax.set_title('Labels per view')
+    cbar = fig.colorbar(cax, ticks=[0, 1])
+    cbar.ax.set_yticklabels(['-1', ' 1'])  # vertically oriented colorbar
+    plt.show()
+    fig.savefig("Results/"+time.strftime("%Y%m%d-%H%M%S")+"-"+name+"-labels.png")
--- a/Code/MonoMutliViewClassifiers/Results/20160906-075939-CMultiV-Benchmark-Methyl_MiRNA__RNASeq_Clinic-Fake-LOG.log
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-075939-CMultiV-Benchmark-Methyl_MiRNA__RNASeq_Clinic-Fake-LOG.log
--- a/Code/MonoMutliViewClassifiers/Results/20160906-075959-CMultiV-Benchmark-Methyl_MiRNA__RNASeq_Clinic-Fake-LOG.log
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-075959-CMultiV-Benchmark-Methyl_MiRNA__RNASeq_Clinic-Fake-LOG.log
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080453-CMultiV-Benchmark-Methyl_MiRNA__RNASeq_Clinic-Fake-LOG.log
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080453-CMultiV-Benchmark-Methyl_MiRNA__RNASeq_Clinic-Fake-LOG.log
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080459Results-Adaboost-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080459Results-Adaboost-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with Adaboost
-accuracy_score on train : 1.0
-accuracy_score on test : 0.533333333333
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- Adaboost with num_esimators : 9, base_estimators : DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
-            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
-            min_samples_split=2, min_weight_fraction_leaf=0.0,
-            presort=False, random_state=None, splitter='best')
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.533333333333
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080459Results-DecisionTree-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080459Results-DecisionTree-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with DecisionTree
-accuracy_score on train : 1.0
-accuracy_score on test : 0.533333333333
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- Decision Tree with max_depth : 25
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.533333333333
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080459Results-KNN-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080459Results-KNN-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with KNN
-accuracy_score on train : 0.595238095238
-accuracy_score on test : 0.622222222222
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- K nearest Neighbors with  n_neighbors: 31
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 0.595238095238
-		- Score on test : 0.622222222222
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080500Results-RandomForest-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080500Results-RandomForest-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with RandomForest
-accuracy_score on train : 1.0
-accuracy_score on test : 0.5
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- Random Forest with num_esimators : 26, max_depth : 23
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.5
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080500Results-SVMLinear-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080500Results-SVMLinear-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with SVMLinear
-accuracy_score on train : 0.428571428571
-accuracy_score on test : 0.477777777778
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- SVM Linear with C : 6231
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 0.428571428571
-		- Score on test : 0.477777777778
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080501Results-SGD-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080501Results-SGD-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with SGD
-accuracy_score on train : 0.571428571429
-accuracy_score on test : 0.422222222222
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- SGDClassifier with loss : perceptron, penalty : l2
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 0.571428571429
-		- Score on test : 0.422222222222
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080501Results-SVMPoly-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080501Results-SVMPoly-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with SVMPoly
-accuracy_score on train : 1.0
-accuracy_score on test : 0.533333333333
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- SVM Linear with C : 6464
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.533333333333
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080501Results-SVMRBF-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080501Results-SVMRBF-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View0 with SVMRBF
-accuracy_score on train : 1.0
-accuracy_score on test : 0.488888888889
-Database configuration : 
-	- Database name : Fake
-	- View name : View0	 View shape : (300, 17)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- SVM Linear with C : 6231
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.488888888889
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080502Results-Adaboost-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080502Results-Adaboost-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View1 with Adaboost
-accuracy_score on train : 1.0
-accuracy_score on test : 0.577777777778
-Database configuration : 
-	- Database name : Fake
-	- View name : View1	 View shape : (300, 14)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- Adaboost with num_esimators : 7, base_estimators : DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
-            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
-            min_samples_split=2, min_weight_fraction_leaf=0.0,
-            presort=False, random_state=None, splitter='best')
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.577777777778
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080502Results-DecisionTree-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080502Results-DecisionTree-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View1 with DecisionTree
-accuracy_score on train : 1.0
-accuracy_score on test : 0.533333333333
-Database configuration : 
-	- Database name : Fake
-	- View name : View1	 View shape : (300, 14)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- Decision Tree with max_depth : 25
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.533333333333
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080502Results-KNN-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080502Results-KNN-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View1 with KNN
-accuracy_score on train : 0.552380952381
-accuracy_score on test : 0.522222222222
-Database configuration : 
-	- Database name : Fake
-	- View name : View1	 View shape : (300, 14)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- K nearest Neighbors with  n_neighbors: 31
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 0.552380952381
-		- Score on test : 0.522222222222
- Classification took 0:00:00
\ No newline at end of file
--- a/Code/MonoMutliViewClassifiers/Results/20160906-080503Results-DecisionTree-Non-Oui-learnRate0.7-Fake.txt
+++ b/Code/MonoMutliViewClassifiers/Results/20160906-080503Results-DecisionTree-Non-Oui-learnRate0.7-Fake.txt
-Classification on Fake database for View2 with DecisionTree
-accuracy_score on train : 1.0
-accuracy_score on test : 0.511111111111
-Database configuration : 
-	- Database name : Fake
-	- View name : View2	 View shape : (300, 11)
-	- Learning Rate : 0.7
-	- Labels used : Non, Oui
-	- Number of cross validation folds : 5
-Classifier configuration : 
-	- Decision Tree with max_depth : 25
-	- Executed on 1 core(s) 
-	- Got configuration using randomized search with 2 iterations 
-	For Accuracy score using None as sample_weights (higher is better) : 
-		- Score on train : 1.0
-		- Score on test : 0.511111111111
- Classification took 0:00:00
\ No newline at end of file