Skip to content
Snippets Groups Projects
Commit 73881adf authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Tested mumbo on fake data

parent 3f33ad04
No related branches found
No related tags found
No related merge requests found
Showing
with 1971 additions and 113 deletions
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager" inherit-compiler-output="false">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" /> <orderEntry type="jdk" jdkName="Python 2.7.11 (~/anaconda2/bin/python)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="TestRunnerService"> <component name="TestRunnerService">
......
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<resourceExtensions />
<wildcardResourcePatterns>
<entry name="!?*.java" />
<entry name="!?*.form" />
<entry name="!?*.class" />
<entry name="!?*.groovy" />
<entry name="!?*.scala" />
<entry name="!?*.flex" />
<entry name="!?*.kt" />
<entry name="!?*.clj" />
<entry name="!?*.aj" />
</wildcardResourcePatterns>
<annotationProcessing>
<profile default="true" name="Default" enabled="false">
<processorPath useClasspath="true" />
</profile>
</annotationProcessing>
</component>
</project>
\ No newline at end of file
<component name="CopyrightManager">
<settings default="" />
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="Code.MultiView.Mumbo.Mumbo.len" />
<option value="Code.MultiView.Mumbo.Mumbo.range" />
<option value="Code.MultiView.Mumbo.Mumbo.zip" />
<option value="Code.MultiView.Mumbo.Mumbo.int" />
<option value="Code.MultiView.Mumbo.Mumbo.attr" />
<option value="Code.MultiView.Mumbo.Mumbo.getattr" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="PROJECT_PROFILE" value="Project Default" />
<option name="USE_PROJECT_PROFILE" value="true" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.3 (/usr/bin/python3.4)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" assert-keyword="false" jdk-15="false" project-jdk-name="Python 2.7.11 (~/anaconda2/bin/python)" project-jdk-type="Python SDK" />
</project> </project>
\ No newline at end of file
This diff is collapsed.
import Mumbo.Mumbo as Mumbo
import GetMutliviewDb as DB
NB_VIEW = 4
DATASET_LENGTH = 300
NB_CLASS = 4
NB_ITER = 50
classifierName="DecisionTree"
NB_CORES = 4
classifierConfig = 3
DATASET, VIEW_DIMENSIONS, CLASS_LABELS = DB.createFakeData(NB_VIEW, DATASET_LENGTH, NB_CLASS)
bestClassifiers, generalAlphas, bestViews = Mumbo.trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, classifierName, NB_CORES, classifierConfig)
predictedLabels = Mumbo.classifyMumbo(DATASET, bestClassifiers, generalAlphas, bestViews, NB_CLASS)
import numpy as np import numpy as np
def getOneViewFromDB(viewName, pathToDB):
view = np.genfromtxt(pathToDB + viewName, delimiter=';') def getOneViewFromDB(viewName, pathToDB, DBName):
view = np.genfromtxt(pathToDB + DBName +"-" + viewName, delimiter=';')
return view return view
def getClassLabels(pathToDB):
labels = np.genfromtxt(pathToDB + "ClassLabels.csv", delimiter=';') def getClassLabels(pathToDB, DBName):
labels = np.genfromtxt(pathToDB + DBName + "-" + "ClassLabels.csv", delimiter=';')
return labels return labels
def getDataset(pathToDB, viewNames):
def getDataset(pathToDB, viewNames, DBName):
dataset = [] dataset = []
for viewName in viewNames: for viewName in viewNames:
dataset.append(getOneViewFromDB(viewName, pathtoDB)) dataset.append(getOneViewFromDB(viewName, pathToDB, DBName))
return np.array(dataset) return np.array(dataset)
def createFakeData(NB_VIEW, DATASET_LENGTH, NB_CLASS):
VIEW_DIMENSIONS = np.random.random_integers(5, 20, NB_VIEW)
DATA = [
np.array([
np.random.normal(0.0, 2, viewDimension)
for i in np.arange(DATASET_LENGTH)])
for viewDimension in VIEW_DIMENSIONS]
CLASS_LABELS = np.random.random_integers(0, NB_CLASS-1, DATASET_LENGTH)
return DATA, VIEW_DIMENSIONS, CLASS_LABELS
\ No newline at end of file
from sklearn import tree from sklearn import tree
from sklearn.multiclass import OneVsRestClassifier # from sklearn.multiclass import OneVsRestClassifier
from ModifiedMulticlass import OneVsRestClassifier
# Add weights # Add weights
def decisionTree(data, labels, arg): def DecisionTree(data, labels, arg, weights):
classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=arg)) classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=arg))
classifier.fit(data, labels) classifier.fit(data, labels, sample_weight=weights)
return classifier, classifier.predict(data) return classifier, classifier.predict(data)
This diff is collapsed.
# from os import listdir
# from os.path import isfile, join
# mypath="."
# modules = [f[:-3] for f in listdir(mypath) if isfile(join(mypath, f)) and f[-3:] == ".py" and f!="__init__.py" ]
# __all__ = modules
import os
for module in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if module == '__init__.py' or module[-3:] != '.py':
continue
__import__(module[:-3], locals(), globals())
del module
del os
import numpy as np import numpy as np
import math import math
from joblib import Parallel, delayed from joblib import Parallel, delayed
import Classifers from Classifiers import *
# Data shape : ((Views, Examples, Corrdinates)) # Data shape : ((Views, Examples, Corrdinates))
...@@ -29,7 +29,7 @@ def initialize(NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, CLASS_LABELS): ...@@ -29,7 +29,7 @@ def initialize(NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, CLASS_LABELS):
]) ])
fs = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH, NB_CLASS)) fs = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH, NB_CLASS))
ds = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH)) ds = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
edges = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH)) edges = np.zeros((NB_ITER, NB_VIEW))
alphas = np.zeros((NB_ITER, NB_VIEW)) alphas = np.zeros((NB_ITER, NB_VIEW))
predictions = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH)) predictions = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
generalAlphas = np.zeros(NB_ITER) generalAlphas = np.zeros(NB_ITER)
...@@ -51,30 +51,38 @@ def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, costMatri ...@@ -51,30 +51,38 @@ def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, costMatri
NB_CLASS, DATASET_LENGTH, iterIndice, viewIndice, NB_CLASS, DATASET_LENGTH, iterIndice, viewIndice,
classifier_config): classifier_config):
weights = computeWeights(costMatrices, NB_CLASS, DATASET_LENGTH, weights = computeWeights(costMatrices, NB_CLASS, DATASET_LENGTH,
iterIndice, viewIndice) iterIndice, viewIndice, CLASS_LABELS)
#Train classifier(classifierName, weights, DATASET, CLASS_LABEL, classifier_config) classifierMethod=globals()["DecisionTree"].DecisionTree #Permet d'appeler une fonction avec une string
classifier, classes = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, weights)
return classifier, classes return classifier, classes
def trainWeakClassifers(classifierName, DATASET, CLASS_LABELS, costMatrices, def trainWeakClassifers(classifierName, DATASET, CLASS_LABELS, costMatrices,
NB_CLASS, DATASET_LENGTH, iterIndice, classifier_config, NB_CLASS, DATASET_LENGTH, iterIndice, classifier_config,
NB_CORES): NB_CORES, NB_VIEW):
trainedClassifiers = []
labelsMatrix = []
if NB_CORES > NB_VIEW: if NB_CORES > NB_VIEW:
NB_JOBS = NB_VIEW NB_JOBS = NB_VIEW
else: else:
NB_JOBS = NB_CORES NB_JOBS = NB_CORES
trainedClassifiers, classesMatrix = Parallel(n_jobs=NB_JOBS)( trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)(
delayed(trainWeakClassifier)(classifierName, DATASET[viewIndice], CLASS_LABELS, delayed(trainWeakClassifier)(classifierName, DATASET[viewIndice], CLASS_LABELS,
costMatrices, NB_CLASS, DATASET_LENGTH, costMatrices, NB_CLASS, DATASET_LENGTH,
iterIndice, viewIndice, classifier_config) iterIndice, viewIndice, classifier_config)
for viewIndice in range(NB_VIEW)) for viewIndice in range(NB_VIEW))
return trainedClassifiers, classesMatrix
for (classifier, labelsArray) in trainedClassifiersAndLabels:
trainedClassifiers.append(classifier)
labelsMatrix.append(labelsArray)
def computeEdge (predictionMatrix, costMatrix): return np.array(trainedClassifiers), np.array(labelsMatrix)
return np.sum(predictionMatrix*costMatrix)
def computeEdge(predictionMatrix, costMatrix, NB_CLASS):
return np.sum(np.array([np.sum(predictionMatrix*costMatrix[:,classIndice]) for classIndice in range(NB_CLASS)]))
def computeAlpha(edge): def computeAlpha(edge):
...@@ -131,7 +139,7 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH, ...@@ -131,7 +139,7 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH,
for exampleIndice in range(DATASET_LENGTH): for exampleIndice in range(DATASET_LENGTH):
for classe in range(NB_CLASS): for classe in range(NB_CLASS):
if classe != CLASS_LABELS[exampleIndice]: if classe != CLASS_LABELS[exampleIndice]:
costMatrices[iterIndice, viewIndice, exampleIndice, classe] \ costMatrices[iterIndice+1, viewIndice, exampleIndice, classe] \
= math.exp(fs[iterIndice, viewIndice, exampleIndice, classe] - \ = math.exp(fs[iterIndice, viewIndice, exampleIndice, classe] - \
fs[iterIndice, viewIndice, fs[iterIndice, viewIndice,
exampleIndice, CLASS_LABELS[exampleIndice]]) exampleIndice, CLASS_LABELS[exampleIndice]])
...@@ -142,9 +150,9 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH, ...@@ -142,9 +150,9 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH,
return costMatrices return costMatrices
def chooseView(predictions, generalCostMatrix, iterIndice, NB_VIEW): def chooseView(predictions, generalCostMatrix, iterIndice, NB_VIEW, NB_CLASS):
edges = np.array([computeEdge(predictions[iterIndice, viewIndice], edges = np.array([computeEdge(predictions[iterIndice, viewIndice],
generalCostMatrix) \ generalCostMatrix[iterIndice], NB_CLASS) \
for viewIndice in range(NB_VIEW)]) for viewIndice in range(NB_VIEW)])
bestView = np.argmax(edges) bestView = np.argmax(edges)
return bestView, edges[bestView] return bestView, edges[bestView]
...@@ -204,7 +212,7 @@ def computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions, ...@@ -204,7 +212,7 @@ def computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions,
def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH,
classifierName, NB_CORES): classifierName, NB_CORES, classifierConfig):
# Initialization # Initialization
costMatrices, \ costMatrices, \
...@@ -218,6 +226,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH ...@@ -218,6 +226,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
# Learning # Learning
for iterIndice in range(NB_ITER): for iterIndice in range(NB_ITER):
classifiers, predictedLabels = trainWeakClassifers(classifierName, classifiers, predictedLabels = trainWeakClassifers(classifierName,
DATASET, DATASET,
CLASS_LABELS, CLASS_LABELS,
...@@ -225,8 +234,9 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH ...@@ -225,8 +234,9 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
NB_CLASS, NB_CLASS,
DATASET_LENGTH, DATASET_LENGTH,
iterIndice, iterIndice,
classifier_config, classifierConfig,
NB_CORES) NB_CORES, NB_VIEW)
predictions[iterIndice] = predictedLabels predictions[iterIndice] = predictedLabels
for viewIndice in range(NB_VIEW): for viewIndice in range(NB_VIEW):
...@@ -234,7 +244,8 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH ...@@ -234,7 +244,8 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
edges[iterIndice, viewIndice] = computeEdge(predictions[iterIndice, edges[iterIndice, viewIndice] = computeEdge(predictions[iterIndice,
viewIndice], viewIndice],
costMatrices[iterIndice+1, costMatrices[iterIndice+1,
viewIndice]) viewIndice], NB_CLASS)
alphas[iterIndice, viewIndice] = computeAlpha(edges[iterIndice, alphas[iterIndice, viewIndice] = computeAlpha(edges[iterIndice,
viewIndice]) viewIndice])
...@@ -247,7 +258,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH ...@@ -247,7 +258,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
NB_CLASS, CLASS_LABELS) NB_CLASS, CLASS_LABELS)
bestView, edge = chooseView(predictions, generalCostMatrix, bestView, edge = chooseView(predictions, generalCostMatrix,
iterIndice, NB_VIEW) iterIndice, NB_VIEW, NB_CLASS)
bestViews[iterIndice] = bestView bestViews[iterIndice] = bestView
generalAlphas[iterIndice] = computeAlpha(edge) generalAlphas[iterIndice] = computeAlpha(edge)
...@@ -261,14 +272,16 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH ...@@ -261,14 +272,16 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
CLASS_LABELS) CLASS_LABELS)
# finalFs = computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions, bestViews, CLASS_LABELS, NB_ITER) # finalFs = computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions, bestViews, CLASS_LABELS, NB_ITER)
return bestClassifiers, generalAlphas return bestClassifiers, generalAlphas, bestViews
def classifyMumbo(DATASET, classifiers, alphas, NB_CLASS): def classifyMumbo(DATASET, classifiers, alphas, views, NB_CLASS):
DATASET_LENGTH = len(DATASET) DATASET_LENGTH = len(DATASET[0])
predictedLabels = np.zeros(DATASET_LENGTH) predictedLabels = np.zeros(DATASET_LENGTH)
for exampleIndice in range(DATASET_LENGTH): for exampleIndice in range(DATASET_LENGTH):
votes = np.zeros(NB_CLASS) votes = np.zeros(NB_CLASS)
for classifier, alpha in zip(classifiers, alphas): for classifier, alpha, view in zip(classifiers, alphas, views):
votes[int(classifier.predict(DATASET[exampleIndice]))]+=alpha data = np.array([np.array(DATASET[int(view)][exampleIndice])])
votes[int(classifier.predict(data))]+=alpha
predictedLabels[exampleIndice] = np.argmax(votes) predictedLabels[exampleIndice] = np.argmax(votes)
return predictedLabels return predictedLabels
\ No newline at end of file
__all__ = ["Mumbo", "Classifiers"]
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment