Select Git revision
MacaonDecode.cpp
-
Franck Dary authored
mcd file is now longer needed, we can give mcd throught program argument, use the default one, or read it from conllu file metadata
Franck Dary authoredmcd file is now longer needed, we can give mcd throught program argument, use the default one, or read it from conllu file metadata
LateFusion.py 6.67 KiB
#!/usr/bin/env python
# -*- encoding: utf-8
import numpy as np
import itertools
from joblib import Parallel, delayed
# from sklearn.multiclass import OneVsOneClassifier
# from sklearn.svm import SVC
import os
import sys
import MonoviewClassifiers
import Metrics
from utils.Dataset import getV
def canProbasClassifier(classifierConfig):
try:
_ = getattr(classifierConfig, "predict_proba")
return True
except AttributeError:
return False
def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas, randomState):
if type(classifierConfig) == dict:
monoviewClassifier = getattr(MonoviewClassifiers, classifierName)
if needProbas and not monoviewClassifier.canProbas():
monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree")
DTConfig = {"0":300, "1":"entropy", "2":"random"}
classifier = monoviewClassifier.fit(data,labels, randomState,DTConfig)
return classifier
else:
classifier = monoviewClassifier.fit(data,labels, randomState,**dict((str(configIndex), config) for configIndex, config in
enumerate(classifierConfig
)))
return classifier
def getScores(LateFusionClassifiers):
return ""
def intersect(allClassifersNames, directory, viewsIndices):
wrongSets = [0 for _ in allClassifersNames]
nbViews = len(viewsIndices)
for classifierIndex, classifierName in enumerate(allClassifersNames):
classifierDirectory = directory+"/"+classifierName+"/"
viewDirectoryNames = os.listdir(classifierDirectory)
wrongSets[classifierIndex]=[0 for _ in viewDirectoryNames]
for viewIndex, viewDirectoryName in enumerate(viewDirectoryNames):
for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectoryName+"/"):
if resultFileName.endswith("train_labels.csv"):
yTrainFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName
elif resultFileName.endswith("train_pred.csv"):
yTrainPredFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName
train = np.genfromtxt(yTrainFileName, delimiter=",").astype(np.int16)
pred = np.genfromtxt(yTrainPredFileName, delimiter=",").astype(np.int16)
length = len(train)
wrongLabelsIndices = np.where(train+pred == 1)
wrongSets[classifierIndex][viewIndex]=wrongLabelsIndices
combinations = itertools.combinations_with_replacement(range(len(allClassifersNames)), nbViews)
bestLen = length
bestCombination = None
for combination in combinations:
intersect = np.arange(length, dtype=np.int16)
for viewIndex, classifierindex in enumerate(combination):
intersect = np.intersect1d(intersect, wrongSets[classifierIndex][viewIndex])
if len(intersect) < bestLen:
bestLen = len(intersect)
bestCombination = combination
return [allClassifersNames[index] for index in bestCombination]
def getFormFile(directory, viewDirectory, resultFileName):
file = open(directory+"/"+viewDirectory+"/"+resultFileName)
for line in file:
if "Score on train" in line:
score = float(line.strip().split(":")[1])
break
elif "train" in line:
metricName = line.strip().split(" ")[0]
metricModule = getattr(Metrics, metricName)
if metricModule.getConfig()[-14]=="h":
betterHigh = True
else:
betterHigh = False
return score, betterHigh
def bestScore(allClassifersNames, directory, viewsIndices):
nbViews = len(viewsIndices)
nbClassifiers = len(allClassifersNames)
scores = np.zeros((nbViews, nbClassifiers))
for classifierIndex, classifierName in enumerate(allClassifersNames):
classifierDirectory = directory+"/"+classifierName+"/"
for viewIndex, viewDirectory in enumerate(os.listdir(classifierDirectory)):
for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectory+"/"):
if resultFileName.endswith(".txt"):
scores[viewIndex, classifierIndex], betterHigh = getFormFile(directory, viewDirectory, resultFileName)
if betterHigh:
classifierIndices = np.argmax(scores, axis=1)
else:
classifierIndices = np.argmin(scores, axis=1)
return [allClassifersNames[index] for index in classifierIndices]
def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsIndices):
thismodule = sys.modules[__name__]
selectionMethod = getattr(thismodule, selectionMethodName)
classifiersNames = selectionMethod(allClassifiersNames, directory, viewsIndices)
return classifiersNames
def getConfig(classifiersNames, resultsMonoview):
classifiersConfigs = [0 for _ in range(len(classifiersNames))]
for viewIndex, classifierName in enumerate(classifiersNames):
for resultMonoview in resultsMonoview:
if resultMonoview[0]==viewIndex and resultMonoview[1][0]==classifierName:
classifiersConfigs[viewIndex]=resultMonoview[1][4]
return classifiersConfigs
class LateFusionClassifier(object):
def __init__(self, randomState, monoviewClassifiersNames, monoviewClassifiersConfigs, monoviewSelection, NB_CORES=1):
self.monoviewClassifiersNames = monoviewClassifiersNames
if type(monoviewClassifiersConfigs[0])==dict:
self.monoviewClassifiersConfigs = monoviewClassifiersConfigs
self.monoviewClassifiers = []
else:
self.monoviewClassifiersConfigs = monoviewClassifiersConfigs
self.nbCores = NB_CORES
self.accuracies = np.zeros(len(monoviewClassifiersNames))
self.needProbas = False
self.monoviewSelection = monoviewSelection
self.randomState = randomState
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
if trainIndices is None:
trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)(
delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index],
getV(DATASET, viewIndex, trainIndices),
DATASET.get("Labels").value[trainIndices],
self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState)
for index, viewIndex in enumerate(viewsIndices))