Skip to content
Snippets Groups Projects
Commit 73881adf authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Tested mumbo on fake data

parent 3f33ad04
No related branches found
No related tags found
No related merge requests found
Showing
with 1971 additions and 113 deletions
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 2.7.11 (~/anaconda2/bin/python)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
......
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<resourceExtensions />
<wildcardResourcePatterns>
<entry name="!?*.java" />
<entry name="!?*.form" />
<entry name="!?*.class" />
<entry name="!?*.groovy" />
<entry name="!?*.scala" />
<entry name="!?*.flex" />
<entry name="!?*.kt" />
<entry name="!?*.clj" />
<entry name="!?*.aj" />
</wildcardResourcePatterns>
<annotationProcessing>
<profile default="true" name="Default" enabled="false">
<processorPath useClasspath="true" />
</profile>
</annotationProcessing>
</component>
</project>
\ No newline at end of file
<component name="CopyrightManager">
<settings default="" />
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="Code.MultiView.Mumbo.Mumbo.len" />
<option value="Code.MultiView.Mumbo.Mumbo.range" />
<option value="Code.MultiView.Mumbo.Mumbo.zip" />
<option value="Code.MultiView.Mumbo.Mumbo.int" />
<option value="Code.MultiView.Mumbo.Mumbo.attr" />
<option value="Code.MultiView.Mumbo.Mumbo.getattr" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="PROJECT_PROFILE" value="Project Default" />
<option name="USE_PROJECT_PROFILE" value="true" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.3 (/usr/bin/python3.4)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" assert-keyword="false" jdk-15="false" project-jdk-name="Python 2.7.11 (~/anaconda2/bin/python)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
This diff is collapsed.
import Mumbo.Mumbo as Mumbo
import GetMutliviewDb as DB
NB_VIEW = 4
DATASET_LENGTH = 300
NB_CLASS = 4
NB_ITER = 50
classifierName="DecisionTree"
NB_CORES = 4
classifierConfig = 3
DATASET, VIEW_DIMENSIONS, CLASS_LABELS = DB.createFakeData(NB_VIEW, DATASET_LENGTH, NB_CLASS)
bestClassifiers, generalAlphas, bestViews = Mumbo.trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, classifierName, NB_CORES, classifierConfig)
predictedLabels = Mumbo.classifyMumbo(DATASET, bestClassifiers, generalAlphas, bestViews, NB_CLASS)
import numpy as np
def getOneViewFromDB(viewName, pathToDB):
view = np.genfromtxt(pathToDB + viewName, delimiter=';')
def getOneViewFromDB(viewName, pathToDB, DBName):
view = np.genfromtxt(pathToDB + DBName +"-" + viewName, delimiter=';')
return view
def getClassLabels(pathToDB):
labels = np.genfromtxt(pathToDB + "ClassLabels.csv", delimiter=';')
def getClassLabels(pathToDB, DBName):
labels = np.genfromtxt(pathToDB + DBName + "-" + "ClassLabels.csv", delimiter=';')
return labels
def getDataset(pathToDB, viewNames):
def getDataset(pathToDB, viewNames, DBName):
dataset = []
for viewName in viewNames:
dataset.append(getOneViewFromDB(viewName, pathtoDB))
dataset.append(getOneViewFromDB(viewName, pathToDB, DBName))
return np.array(dataset)
def createFakeData(NB_VIEW, DATASET_LENGTH, NB_CLASS):
VIEW_DIMENSIONS = np.random.random_integers(5, 20, NB_VIEW)
DATA = [
np.array([
np.random.normal(0.0, 2, viewDimension)
for i in np.arange(DATASET_LENGTH)])
for viewDimension in VIEW_DIMENSIONS]
CLASS_LABELS = np.random.random_integers(0, NB_CLASS-1, DATASET_LENGTH)
return DATA, VIEW_DIMENSIONS, CLASS_LABELS
\ No newline at end of file
from sklearn import tree
from sklearn.multiclass import OneVsRestClassifier
# from sklearn.multiclass import OneVsRestClassifier
from ModifiedMulticlass import OneVsRestClassifier
# Add weights
def decisionTree(data, labels, arg):
def DecisionTree(data, labels, arg, weights):
classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=arg))
classifier.fit(data, labels)
classifier.fit(data, labels, sample_weight=weights)
return classifier, classifier.predict(data)
This diff is collapsed.
# from os import listdir
# from os.path import isfile, join
# mypath="."
# modules = [f[:-3] for f in listdir(mypath) if isfile(join(mypath, f)) and f[-3:] == ".py" and f!="__init__.py" ]
# __all__ = modules
import os
for module in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if module == '__init__.py' or module[-3:] != '.py':
continue
__import__(module[:-3], locals(), globals())
del module
del os
import numpy as np
import math
from joblib import Parallel, delayed
import Classifers
from Classifiers import *
# Data shape : ((Views, Examples, Corrdinates))
......@@ -29,7 +29,7 @@ def initialize(NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH, CLASS_LABELS):
])
fs = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH, NB_CLASS))
ds = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
edges = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
edges = np.zeros((NB_ITER, NB_VIEW))
alphas = np.zeros((NB_ITER, NB_VIEW))
predictions = np.zeros((NB_ITER, NB_VIEW, DATASET_LENGTH))
generalAlphas = np.zeros(NB_ITER)
......@@ -51,30 +51,38 @@ def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, costMatri
NB_CLASS, DATASET_LENGTH, iterIndice, viewIndice,
classifier_config):
weights = computeWeights(costMatrices, NB_CLASS, DATASET_LENGTH,
iterIndice, viewIndice)
#Train classifier(classifierName, weights, DATASET, CLASS_LABEL, classifier_config)
iterIndice, viewIndice, CLASS_LABELS)
classifierMethod=globals()["DecisionTree"].DecisionTree #Permet d'appeler une fonction avec une string
classifier, classes = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, weights)
return classifier, classes
def trainWeakClassifers(classifierName, DATASET, CLASS_LABELS, costMatrices,
NB_CLASS, DATASET_LENGTH, iterIndice, classifier_config,
NB_CORES):
NB_CORES, NB_VIEW):
trainedClassifiers = []
labelsMatrix = []
if NB_CORES > NB_VIEW:
NB_JOBS = NB_VIEW
else:
NB_JOBS = NB_CORES
trainedClassifiers, classesMatrix = Parallel(n_jobs=NB_JOBS)(
trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)(
delayed(trainWeakClassifier)(classifierName, DATASET[viewIndice], CLASS_LABELS,
costMatrices, NB_CLASS, DATASET_LENGTH,
iterIndice, viewIndice, classifier_config)
for viewIndice in range(NB_VIEW))
return trainedClassifiers, classesMatrix
for (classifier, labelsArray) in trainedClassifiersAndLabels:
trainedClassifiers.append(classifier)
labelsMatrix.append(labelsArray)
def computeEdge (predictionMatrix, costMatrix):
return np.sum(predictionMatrix*costMatrix)
return np.array(trainedClassifiers), np.array(labelsMatrix)
def computeEdge(predictionMatrix, costMatrix, NB_CLASS):
return np.sum(np.array([np.sum(predictionMatrix*costMatrix[:,classIndice]) for classIndice in range(NB_CLASS)]))
def computeAlpha(edge):
......@@ -131,7 +139,7 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH,
for exampleIndice in range(DATASET_LENGTH):
for classe in range(NB_CLASS):
if classe != CLASS_LABELS[exampleIndice]:
costMatrices[iterIndice, viewIndice, exampleIndice, classe] \
costMatrices[iterIndice+1, viewIndice, exampleIndice, classe] \
= math.exp(fs[iterIndice, viewIndice, exampleIndice, classe] - \
fs[iterIndice, viewIndice,
exampleIndice, CLASS_LABELS[exampleIndice]])
......@@ -142,9 +150,9 @@ def updateCostmatrices(costMatrices, fs, iterIndice, NB_VIEW, DATASET_LENGTH,
return costMatrices
def chooseView(predictions, generalCostMatrix, iterIndice, NB_VIEW):
def chooseView(predictions, generalCostMatrix, iterIndice, NB_VIEW, NB_CLASS):
edges = np.array([computeEdge(predictions[iterIndice, viewIndice],
generalCostMatrix) \
generalCostMatrix[iterIndice], NB_CLASS) \
for viewIndice in range(NB_VIEW)])
bestView = np.argmax(edges)
return bestView, edges[bestView]
......@@ -204,7 +212,7 @@ def computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions,
def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH,
classifierName, NB_CORES):
classifierName, NB_CORES, classifierConfig):
# Initialization
costMatrices, \
......@@ -218,6 +226,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
# Learning
for iterIndice in range(NB_ITER):
classifiers, predictedLabels = trainWeakClassifers(classifierName,
DATASET,
CLASS_LABELS,
......@@ -225,8 +234,9 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
NB_CLASS,
DATASET_LENGTH,
iterIndice,
classifier_config,
NB_CORES)
classifierConfig,
NB_CORES, NB_VIEW)
predictions[iterIndice] = predictedLabels
for viewIndice in range(NB_VIEW):
......@@ -234,7 +244,8 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
edges[iterIndice, viewIndice] = computeEdge(predictions[iterIndice,
viewIndice],
costMatrices[iterIndice+1,
viewIndice])
viewIndice], NB_CLASS)
alphas[iterIndice, viewIndice] = computeAlpha(edges[iterIndice,
viewIndice])
......@@ -247,7 +258,7 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
NB_CLASS, CLASS_LABELS)
bestView, edge = chooseView(predictions, generalCostMatrix,
iterIndice, NB_VIEW)
iterIndice, NB_VIEW, NB_CLASS)
bestViews[iterIndice] = bestView
generalAlphas[iterIndice] = computeAlpha(edge)
......@@ -261,14 +272,16 @@ def trainMumbo(DATASET, CLASS_LABELS, NB_CLASS, NB_VIEW, NB_ITER, DATASET_LENGTH
CLASS_LABELS)
# finalFs = computeFinalFs(DATASET_LENGTH, NB_CLASS, generalAlphas, predictions, bestViews, CLASS_LABELS, NB_ITER)
return bestClassifiers, generalAlphas
return bestClassifiers, generalAlphas, bestViews
def classifyMumbo(DATASET, classifiers, alphas, NB_CLASS):
DATASET_LENGTH = len(DATASET)
def classifyMumbo(DATASET, classifiers, alphas, views, NB_CLASS):
DATASET_LENGTH = len(DATASET[0])
predictedLabels = np.zeros(DATASET_LENGTH)
for exampleIndice in range(DATASET_LENGTH):
votes = np.zeros(NB_CLASS)
for classifier, alpha in zip(classifiers, alphas):
votes[int(classifier.predict(DATASET[exampleIndice]))]+=alpha
for classifier, alpha, view in zip(classifiers, alphas, views):
data = np.array([np.array(DATASET[int(view)][exampleIndice])])
votes[int(classifier.predict(data))]+=alpha
predictedLabels[exampleIndice] = np.argmax(votes)
return predictedLabels
\ No newline at end of file
__all__ = ["Mumbo", "Classifiers"]
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment