Tested mumbo on fake data

73881adf · Baptiste Bauvin · 3f33ad04 · 73881adf · 73881adf · 73881adf
Commit 73881adf authored Apr 5, 2016 by Baptiste Bauvin
--- a/.idea/Multiview-Machine-Learning-OMIS.iml
+++ b/.idea/Multiview-Machine-Learning-OMIS.iml
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
+  <component name="NewModuleRootManager" inherit-compiler-output="false">
    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 2.7.11 (~/anaconda2/bin/python)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="TestRunnerService">

--- a/.idea/compiler.xml
+++ b/.idea/compiler.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CompilerConfiguration">
+    <resourceExtensions />
+    <wildcardResourcePatterns>
+      <entry name="!?*.java" />
+      <entry name="!?*.form" />
+      <entry name="!?*.class" />
+      <entry name="!?*.groovy" />
+      <entry name="!?*.scala" />
+      <entry name="!?*.flex" />
+      <entry name="!?*.kt" />
+      <entry name="!?*.clj" />
+      <entry name="!?*.aj" />
+    </wildcardResourcePatterns>
+    <annotationProcessing>
+      <profile default="true" name="Default" enabled="false">
+        <processorPath useClasspath="true" />
+      </profile>
+    </annotationProcessing>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/copyright/profiles_settings.xml
+++ b/.idea/copyright/profiles_settings.xml
+<component name="CopyrightManager">
+  <settings default="" />
+</component>
\ No newline at end of file
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="Code.MultiView.Mumbo.Mumbo.len" />
+          <option value="Code.MultiView.Mumbo.Mumbo.range" />
+          <option value="Code.MultiView.Mumbo.Mumbo.zip" />
+          <option value="Code.MultiView.Mumbo.Mumbo.int" />
+          <option value="Code.MultiView.Mumbo.Mumbo.attr" />
+          <option value="Code.MultiView.Mumbo.Mumbo.getattr" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="PROJECT_PROFILE" value="Project Default" />
+    <option name="USE_PROJECT_PROFILE" value="true" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.3 (/usr/bin/python3.4)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" assert-keyword="false" jdk-15="false" project-jdk-name="Python 2.7.11 (~/anaconda2/bin/python)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
--- a/Code/MultiView/ExecMultiview.py
+++ b/Code/MultiView/ExecMultiview.py
+import Mumbo.Mumbo as Mumbo
+import GetMutliviewDb as DB
+
+NB_VIEW = 4
+DATASET_LENGTH = 300
+NB_CLASS = 4
+NB_ITER = 50
+classifierName="DecisionTree"
+NB_CORES = 4
+classifierConfig = 3
+
+DATASET, VIEW_DIMENSIONS, CLASS_LABELS = DB.createFakeData(NB_VIEW, DATASET_LENGTH, NB_CLASS)
+bestClassifiers, generalAlphas, bestViews = Mumbo.trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, classifierName, NB_CORES, classifierConfig)
+
+predictedLabels = Mumbo.classifyMumbo(DATASET, bestClassifiers, generalAlphas, bestViews, NB_CLASS)
--- a/Code/MultiView/GetMutliviewDb.py
+++ b/Code/MultiView/GetMutliviewDb.py
 import numpy as np

-def getOneViewFromDB(viewName, pathToDB):
-	view = np.genfromtxt(pathToDB + viewName, delimiter=';')
+
+def getOneViewFromDB(viewName, pathToDB, DBName):
+    view = np.genfromtxt(pathToDB + DBName +"-" + viewName, delimiter=';')
    return view

-def getClassLabels(pathToDB):
-	labels = np.genfromtxt(pathToDB + "ClassLabels.csv", delimiter=';')
+
+def getClassLabels(pathToDB, DBName):
+    labels = np.genfromtxt(pathToDB + DBName + "-" + "ClassLabels.csv", delimiter=';')
    return labels

-def getDataset(pathToDB, viewNames):
+
+def getDataset(pathToDB, viewNames, DBName):
    dataset = []
    for viewName in viewNames:
-		dataset.append(getOneViewFromDB(viewName, pathtoDB))
+        dataset.append(getOneViewFromDB(viewName, pathToDB, DBName))
    return np.array(dataset)
+
+
+def createFakeData(NB_VIEW, DATASET_LENGTH, NB_CLASS):
+    VIEW_DIMENSIONS = np.random.random_integers(5, 20, NB_VIEW)
+
+    DATA = [
+                        np.array([
+                                     np.random.normal(0.0, 2, viewDimension)
+                                     for i in np.arange(DATASET_LENGTH)])
+                        for viewDimension in VIEW_DIMENSIONS]
+
+    CLASS_LABELS = np.random.random_integers(0, NB_CLASS-1, DATASET_LENGTH)
+    return DATA, VIEW_DIMENSIONS, CLASS_LABELS
\ No newline at end of file
--- a/Code/MultiView/Mumbo/Classifiers/DecisionTree.py
+++ b/Code/MultiView/Mumbo/Classifiers/DecisionTree.py
 from sklearn import tree
-from sklearn.multiclass import OneVsRestClassifier
-
+# from sklearn.multiclass import OneVsRestClassifier
+from ModifiedMulticlass import OneVsRestClassifier

 # Add weights 

-def decisionTree(data, labels, arg):
+def DecisionTree(data, labels, arg, weights):
    classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=arg))
-    classifier.fit(data, labels)
+    classifier.fit(data, labels, sample_weight=weights)
    return classifier, classifier.predict(data)
+
--- a/Code/MultiView/Mumbo/Classifiers/ModifiedMulticlass.py
+++ b/Code/MultiView/Mumbo/Classifiers/ModifiedMulticlass.py
--- a/Code/MultiView/Mumbo/Classifiers/__init__.py
+++ b/Code/MultiView/Mumbo/Classifiers/__init__.py
+# from os import listdir
+# from os.path import isfile, join
+# mypath="."
+# modules = [f[:-3] for f in listdir(mypath) if isfile(join(mypath, f)) and f[-3:] == ".py" and f!="__init__.py" ]
+# __all__ = modules
+
+import os
+for module in os.listdir(os.path.dirname(os.path.realpath(__file__))):
+    if module == '__init__.py' or module[-3:] != '.py':
+        continue
+    __import__(module[:-3], locals(), globals())
+del module
+del os
--- a/Code/MultiView/Mumbo/Mumbo.py
+++ b/Code/MultiView/Mumbo/Mumbo.py
 import numpy as np
 import math
 from joblib import Parallel, delayed
-import Classifers
+from Classifiers import *

 # Data shape : ((Views, Examples, Corrdinates))

@@ -29,7 +29,7 @@ def initialize(NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, CLASS_LABELS):
                        ])
    fs = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH, NB_CLASS))
    ds = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
-    edges = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
+    edges = np.zeros((NB_ITER, NB_VIEW))
    alphas = np.zeros((NB_ITER, NB_VIEW))
    predictions = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
    generalAlphas = np.zeros(NB_ITER)
@@ -51,30 +51,38 @@ def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, costMatri
                        NB_CLASS, DATASET_LENGTH, iterIndice, viewIndice, 
                        classifier_config):
    weights = computeWeights(costMatrices, NB_CLASS, DATASET_LENGTH, 
-                            iterIndice, viewIndice)
-    #Train classifier(classifierName, weights, DATASET, CLASS_LABEL, classifier_config)
+                            iterIndice, viewIndice, CLASS_LABELS)
+    classifierMethod=globals()["DecisionTree"].DecisionTree #Permet d'appeler une fonction avec une string
+    classifier, classes = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, weights)
    return classifier, classes



 def trainWeakClassifers(classifierName, DATASET, CLASS_LABELS, costMatrices,
                        NB_CLASS, DATASET_LENGTH, iterIndice, classifier_config,
-                        NB_CORES):
+                        NB_CORES, NB_VIEW):
+    trainedClassifiers = []
+    labelsMatrix = []
    if NB_CORES > NB_VIEW:
        NB_JOBS = NB_VIEW
    else:
        NB_JOBS = NB_CORES

-    trainedClassifiers, classesMatrix = Parallel(n_jobs=NB_JOBS)(
+    trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)(
        delayed(trainWeakClassifier)(classifierName, DATASET[viewIndice], CLASS_LABELS, 
                                    costMatrices, NB_CLASS, DATASET_LENGTH, 
                                    iterIndice, viewIndice, classifier_config) 
        for viewIndice in range(NB_VIEW))
-        return trainedClassifiers, classesMatrix

+    for (classifier, labelsArray) in trainedClassifiersAndLabels:
+        trainedClassifiers.append(classifier)
+        labelsMatrix.append(labelsArray)

-def computeEdge (predictionMatrix, costMatrix):
-    return np.sum(predictionMatrix*costMatrix)
+    return np.array(trainedClassifiers), np.array(labelsMatrix)
+
+
+def computeEdge(predictionMatrix, costMatrix, NB_CLASS):
+    return np.sum(np.array([np.sum(predictionMatrix*costMatrix[:,classIndice]) for classIndice in range(NB_CLASS)]))


 def computeAlpha(edge):
@@ -131,7 +139,7 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH,
        for exampleIndice in range(DATASET_LENGTH):
            for classe in range(NB_CLASS):
                if classe != CLASS_LABELS[exampleIndice]:
-                    costMatrices[iterIndice, viewIndice, exampleIndice, classe] \
+                    costMatrices[iterIndice+1, viewIndice, exampleIndice, classe] \
                    = math.exp(fs[iterIndice, viewIndice, exampleIndice, classe] - \
                      fs[iterIndice, viewIndice, 
                        exampleIndice, CLASS_LABELS[exampleIndice]])
@@ -142,9 +150,9 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH,
    return costMatrices


-def chooseView(predictions, generalCostMatrix, iterIndice, NB_VIEW):
+def chooseView(predictions, generalCostMatrix, iterIndice, NB_VIEW, NB_CLASS):
    edges = np.array([computeEdge(predictions[iterIndice, viewIndice], 
-                                    generalCostMatrix) \
+                                    generalCostMatrix[iterIndice], NB_CLASS) \
                      for viewIndice in range(NB_VIEW)])
    bestView = np.argmax(edges)
    return bestView, edges[bestView]
@@ -204,7 +212,7 @@ def computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions,


 def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH,
-                 classifierName, NB_CORES):
+               classifierName, NB_CORES, classifierConfig):
    
    # Initialization
    costMatrices, \
@@ -218,6 +226,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
    # Learning
    for iterIndice in range(NB_ITER):

+
        classifiers, predictedLabels = trainWeakClassifers(classifierName,
                                                           DATASET,
                                                           CLASS_LABELS,
@@ -225,8 +234,9 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
                                                           NB_CLASS,
                                                           DATASET_LENGTH,
                                                           iterIndice,
-                                                            classifier_config, 
-                                                            NB_CORES)
+                                                           classifierConfig,
+                                                           NB_CORES, NB_VIEW)
+
        predictions[iterIndice] = predictedLabels

        for viewIndice in range(NB_VIEW):
@@ -234,7 +244,8 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
            edges[iterIndice, viewIndice] = computeEdge(predictions[iterIndice,
                                                                    viewIndice],
                                                        costMatrices[iterIndice+1,
-                                                                    viewIndice])
+                                                                     viewIndice], NB_CLASS)
+
            alphas[iterIndice, viewIndice] = computeAlpha(edges[iterIndice,
                                                                viewIndice])

@@ -247,7 +258,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
                                          NB_CLASS, CLASS_LABELS)

        bestView, edge = chooseView(predictions, generalCostMatrix,
-                                    iterIndice, NB_VIEW)
+                                    iterIndice, NB_VIEW, NB_CLASS)

        bestViews[iterIndice] = bestView
        generalAlphas[iterIndice] = computeAlpha(edge)
@@ -261,14 +272,16 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
                                                    CLASS_LABELS)

    # finalFs = computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions, bestViews, CLASS_LABELS, NB_ITER)
-    return bestClassifiers, generalAlphas
+    return bestClassifiers, generalAlphas, bestViews

-def classifyMumbo(DATASET, classifiers, alphas, NB_CLASS):
-    DATASET_LENGTH = len(DATASET)
+def classifyMumbo(DATASET, classifiers, alphas, views, NB_CLASS):
+    DATASET_LENGTH = len(DATASET[0])
    predictedLabels = np.zeros(DATASET_LENGTH)
+
    for exampleIndice in range(DATASET_LENGTH):
        votes = np.zeros(NB_CLASS)
-        for classifier, alpha in zip(classifiers, alphas):
-            votes[int(classifier.predict(DATASET[exampleIndice]))]+=alpha
+        for classifier, alpha, view in zip(classifiers, alphas, views):
+            data = np.array([np.array(DATASET[int(view)][exampleIndice])])
+            votes[int(classifier.predict(data))]+=alpha
        predictedLabels[exampleIndice] = np.argmax(votes)
    return predictedLabels
\ No newline at end of file
--- a/Code/MultiView/Mumbo/__init__.py
+++ b/Code/MultiView/Mumbo/__init__.py
+__all__ = ["Mumbo", "Classifiers"]
\ No newline at end of file
--- a/multiview-machine-learning-omis.iml
+++ b/multiview-machine-learning-omis.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file