Skip to content
Snippets Groups Projects
Commit f7f725c8 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Did Mumbo successfully but there is still a lot of work on fusion

parent cd8e9326
Branches
Tags
No related merge requests found
Showing
with 512 additions and 228 deletions
...@@ -47,13 +47,6 @@ groupStandard.add_argument('--views', metavar='STRING', action='store',help='Nam ...@@ -47,13 +47,6 @@ groupStandard.add_argument('--views', metavar='STRING', action='store',help='Nam
default='') default='')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',help='Path to the views (default: %(default)s)', groupStandard.add_argument('--pathF', metavar='STRING', action='store',help='Path to the views (default: %(default)s)',
default='/home/bbauvin/Documents/Data/Data_multi_omics/') default='/home/bbauvin/Documents/Data/Data_multi_omics/')
groupStandard.add_argument('--fileCL', metavar='STRING', action='store',
help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv')
groupStandard.add_argument('--fileCLD', metavar='STRING', action='store',
help='Name of classLabels-Description CSV-file (default: %(default)s)',
default='classLabels-Description.csv')
groupStandard.add_argument('--fileFeat', metavar='STRING', action='store',
help='Name of feature CSV-file (default: %(default)s)', default='feature.csv')
groupStandard.add_argument('--nice', metavar='INT', action='store', type=int, groupStandard.add_argument('--nice', metavar='INT', action='store', type=int,
help='Niceness for the process', default=0) help='Niceness for the process', default=0)
...@@ -86,8 +79,8 @@ groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs= ...@@ -86,8 +79,8 @@ groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs=
'first one will be used for classification', default=['']) 'first one will be used for classification', default=[''])
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store', groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
help='Determine how many Randomized grid search tests to do', type=int, default=30) help='Determine how many Randomized grid search tests to do', type=int, default=30)
groupClass.add_argument('--CL_NoGS', action='store_false', groupClass.add_argument('--CL_GS_type', metavar='STRING', action='store',
help='Determine how many Randomized grid search tests to do') help='Determine which hyperparamter search function use', default="randomizedSearch")
groupRF = parser.add_argument_group('Random Forest arguments') groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees', groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees',
...@@ -176,9 +169,9 @@ else: ...@@ -176,9 +169,9 @@ else:
getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:]) getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:])
try: try:
gridSearch = args.CL_NoGS gridSearch = args.CL_GS_type
except: except:
gridSearch = True gridSearch = "None"
directory = os.path.dirname(os.path.abspath(__file__)) + "/Results/" directory = os.path.dirname(os.path.abspath(__file__)) + "/Results/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-CMultiV-" + args.CL_type + "-" + "_".join(args.views.split(":")) + "-" + args.name + \ logFileName = time.strftime("%Y%m%d-%H%M%S") + "-CMultiV-" + args.CL_type + "-" + "_".join(args.views.split(":")) + "-" + args.name + \
...@@ -318,14 +311,14 @@ try: ...@@ -318,14 +311,14 @@ try:
for classifier in benchmark["Monoview"]: for classifier in benchmark["Monoview"]:
if classifier=="SCM": if classifier=="SCM":
if DATASET.get("View"+str(allViews.index(view))).attrs["binary"]: if DATASET.get("View"+str(allViews.index(view))).attrs["binary"]:
arguments = {"args":{classifier+"KWARGS": globals()[classifier+"KWARGSInit"], "feat":view, "fileFeat": args.fileFeat, arguments = {"args":{classifier+"KWARGS": globals()[classifier+"KWARGSInit"], "feat":view,
"fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": classifier, "nbClass":NB_CLASS}, "viewIndex":allViews.index(view)} "CL_type": classifier, "nbClass":NB_CLASS}, "viewIndex":allViews.index(view)}
argumentDictionaries["Monoview"].append(arguments) argumentDictionaries["Monoview"].append(arguments)
else: else:
pass pass
else: else:
arguments = {"args":{classifier+"KWARGS": globals()[classifier+"KWARGSInit"], "feat":view, "fileFeat": args.fileFeat, arguments = {"args":{classifier+"KWARGS": globals()[classifier+"KWARGSInit"], "feat":view,
"fileCL": args.fileCL, "fileCLD": args.fileCLD, "CL_type": classifier, "nbClass":NB_CLASS}, "viewIndex":allViews.index(view)} "CL_type": classifier, "nbClass":NB_CLASS}, "viewIndex":allViews.index(view)}
argumentDictionaries["Monoview"].append(arguments) argumentDictionaries["Monoview"].append(arguments)
except: except:
pass pass
...@@ -343,10 +336,6 @@ if nbCores>1: ...@@ -343,10 +336,6 @@ if nbCores>1:
accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
classifiersConfigs = [[result[1][2] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] classifiersConfigs = [[result[1][2] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)]
# for viewIndex, view in enumerate(views):
# bestClassifiers.append(classifiersNames[viewIndex][np.argmax(np.array(accuracies[viewIndex]))])
# bestClassifiersConfigs.append(classifiersConfigs[viewIndex][np.argmax(np.array(accuracies[viewIndex]))])
else: else:
resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])), resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])),
DATASET.get("Labels").value, args.name, labelsNames, DATASET.get("Labels").value, args.name, labelsNames,
...@@ -359,6 +348,7 @@ else: ...@@ -359,6 +348,7 @@ else:
classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices] classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices] classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices]
monoviewTime = time.time()-dataBaseTime-start monoviewTime = time.time()-dataBaseTime-start
print classifiersConfigs
if True: if True:
if benchmark["Multiview"]: if benchmark["Multiview"]:
try: try:
...@@ -374,16 +364,16 @@ if True: ...@@ -374,16 +364,16 @@ if True:
"MumboKWARGS": {"classifiersNames": mumboClassifiersNames, "MumboKWARGS": {"classifiersNames": mumboClassifiersNames,
"maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]), "maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]),
"threshold":args.MU_iter[2], "threshold":args.MU_iter[2],
"classifiersConfigs": [argument.split(":") for argument in args.MU_config]}} "classifiersConfigs": [argument.split(":") for argument in args.MU_config], "nbView":(len(viewsIndices))}}
argumentDictionaries["Multiview"].append(arguments) argumentDictionaries["Multiview"].append(arguments)
except: except:
pass pass
try: try:
if benchmark["Multiview"]["Fusion"]: if benchmark["Multiview"]["Fusion"]:
if args.CL_algos_monoview !=['']: if args.FU_cl_names.split(':') !=['']:
monoClassifiers = args.CL_algos_monoview.split(":") monoClassifiers = args.FU_cl_names.split(":")
monoClassifiersConfigs = [classifier+"KWARGS" for classifier in monoClassifiers] monoClassifiersConfigs = [globals()[classifier+"KWARGS"] for classifier in monoClassifiers]
if args.FU_method_config != [""]: if args.FU_method_config != [""]:
fusionMethodConfigs = [map(float,config.split(":")) for config in args.FU_method_config] fusionMethodConfigs = [map(float,config.split(":")) for config in args.FU_method_config]
elif not gridSearch: elif not gridSearch:
...@@ -405,7 +395,7 @@ if True: ...@@ -405,7 +395,7 @@ if True:
"FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method, "FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method,
"classifiersNames": args.FU_cl_names.split(":"), "classifiersNames": args.FU_cl_names.split(":"),
"classifiersConfigs": monoClassifiersConfigs, "classifiersConfigs": monoClassifiersConfigs,
'fusionMethodConfig': fusionMethodConfigs[methodIndex]}} 'fusionMethodConfig': fusionMethodConfigs[methodIndex], "nbView":(len(viewsIndices))}}
argumentDictionaries["Multiview"].append(arguments) argumentDictionaries["Multiview"].append(arguments)
else: else:
for combination in itertools.combinations_with_replacement(range(len(monoClassifiers)), NB_VIEW): for combination in itertools.combinations_with_replacement(range(len(monoClassifiers)), NB_VIEW):
...@@ -420,7 +410,7 @@ if True: ...@@ -420,7 +410,7 @@ if True:
"FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method, "FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method,
"classifiersNames": monoClassifiersNamesComb, "classifiersNames": monoClassifiersNamesComb,
"classifiersConfigs": monoClassifiersConfigsComb, "classifiersConfigs": monoClassifiersConfigsComb,
'fusionMethodConfig': fusionMethodConfigs[methodIndex]}} 'fusionMethodConfig': fusionMethodConfigs[methodIndex], "nbView":(len(viewsIndices))}}
argumentDictionaries["Multiview"].append(arguments) argumentDictionaries["Multiview"].append(arguments)
except: except:
pass pass
...@@ -469,6 +459,7 @@ if True: ...@@ -469,6 +459,7 @@ if True:
else: else:
pass pass
# resultsMultiview = [] # resultsMultiview = []
print argumentDictionaries["Multiview"]
if nbCores>1: if nbCores>1:
resultsMultiview = [] resultsMultiview = []
nbExperiments = len(argumentDictionaries["Multiview"]) nbExperiments = len(argumentDictionaries["Multiview"])
......
...@@ -50,11 +50,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa ...@@ -50,11 +50,7 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa
except: except:
kwargs = args kwargs = args
t_start = time.time() t_start = time.time()
directory = os.path.dirname(os.path.abspath(__file__)) + "/Results-ClassMonoView/"
feat = X.attrs["name"] feat = X.attrs["name"]
fileFeat = kwargs["fileFeat"]
fileCL = kwargs["fileCL"]
fileCLD = kwargs["fileCLD"]
CL_type = kwargs["CL_type"] CL_type = kwargs["CL_type"]
nbClass = kwargs["nbClass"] nbClass = kwargs["nbClass"]
X = getValue(X) X = getValue(X)
...@@ -96,7 +92,6 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa ...@@ -96,7 +92,6 @@ def ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databa
logging.debug("Done:\t RandomSearch best settings") logging.debug("Done:\t RandomSearch best settings")
logging.debug("Start:\t Training") logging.debug("Start:\t Training")
cl_res = classifierModule.fit(X_train, y_train, NB_CORES=nbCores, **clKWARGS) cl_res = classifierModule.fit(X_train, y_train, NB_CORES=nbCores, **clKWARGS)
logging.debug("Done:\t Training") logging.debug("Done:\t Training")
logging.debug("Start:\t Predicting") logging.debug("Start:\t Predicting")
......
...@@ -33,6 +33,7 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): ...@@ -33,6 +33,7 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs):
binaryAttributes = kwargs["binaryAttributes"] binaryAttributes = kwargs["binaryAttributes"]
except: except:
attributeClassification, binaryAttributes, dsetFile, name = transformData(DATASET) attributeClassification, binaryAttributes, dsetFile, name = transformData(DATASET)
print kwargs
classifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=max_attrtibutes, model_type=model_type, verbose=False) classifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=max_attrtibutes, model_type=model_type, verbose=False)
classifier.fit(binaryAttributes, CLASS_LABELS, X=None, attribute_classifications=attributeClassification, iteration_callback=None) classifier.fit(binaryAttributes, CLASS_LABELS, X=None, attribute_classifications=attributeClassification, iteration_callback=None)
try: try:
......
...@@ -15,6 +15,7 @@ import logging ...@@ -15,6 +15,7 @@ import logging
import time import time
import h5py import h5py
from utils.Dataset import getShape from utils.Dataset import getShape
from utils.HyperParameterSearch import searchBestSettings
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
...@@ -39,7 +40,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -39,7 +40,7 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] NB_CLASS = DATASET.get("Metadata").attrs["nbClass"]
if not metrics: if not metrics:
metrics = [["accuracy_score", None]] metrics = [["accuracy_score", None]]
metric = metrics[0]
CL_type = kwargs["CL_type"] CL_type = kwargs["CL_type"]
LABELS_NAMES = kwargs["LABELS_NAMES"] LABELS_NAMES = kwargs["LABELS_NAMES"]
classificationKWARGS = kwargs[CL_type+"KWARGS"] classificationKWARGS = kwargs[CL_type+"KWARGS"]
...@@ -55,14 +56,10 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -55,14 +56,10 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
logging.info("Done:\t Read Database Files") logging.info("Done:\t Read Database Files")
extractionTime = time.time() - t_start extractionTime = time.time() - t_start
kFoldPredictedTrainLabels = []
kFoldPredictedTestLabels = []
kFoldPredictedValidationLabels = []
kFoldLearningTime = []
kFoldPredictionTime = []
kFoldClassifier = []
ivalidationIndices = [] ivalidationIndices = []
ikFolds = [] trainLabelsIterations = []
testLabelsIterations = []
classifiersIterations = []
classifierPackage = globals()[CL_type] # Permet d'appeler un module avec une string classifierPackage = globals()[CL_type] # Permet d'appeler un module avec une string
classifierModule = getattr(classifierPackage, CL_type) classifierModule = getattr(classifierPackage, CL_type)
classifierClass = getattr(classifierModule, CL_type) classifierClass = getattr(classifierModule, CL_type)
...@@ -91,63 +88,30 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p ...@@ -91,63 +88,30 @@ def ExecMultiview(DATASET, name, learningRate, nbFolds, nbCores, databaseType, p
logging.info("Start:\t Learning with " + CL_type + " and " + str(len(kFolds)) + " folds") logging.info("Start:\t Learning with " + CL_type + " and " + str(len(kFolds)) + " folds")
logging.info("Start:\t Classification") logging.info("Start:\t Classification")
# Begin Classification # Begin Classification
classifier = searchBestSettings(DATASET, CL_type, metrics, viewsIndices=viewsIndices, usedIndices=learningIndices, kFolds=kFolds, searchingTool=gridSearch, nIter=1, **classificationKWARGS)
kFoldPredictedTrainLabelsIter = [] classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices)
kFoldPredictedTestLabelsIter = [] trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices)
kFoldPredictedValidationLabelsIter = [] testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices)
kFoldLearningTimeIter = [] trainLabelsIterations.append(trainLabels)
kFoldPredictionTimeIter = [] testLabelsIterations.append(testLabels)
kFoldClassifierIter = []
for foldIdx, fold in enumerate(kFolds):
if fold != range(classificationSetLength):
fold.sort()
logging.info("\tStart:\t Fold number " + str(foldIdx + 1))
trainIndices = [index for index in range(datasetLength) if (index not in fold) and (index not in validationIndices)]
if gridSearch:
logging.info("Start:\t Randomsearching best settings for monoview classifiers")
bestSettings, fusionConfig = classifierGridSearch(DATASET, viewsIndices, classificationKWARGS, trainIndices
, metric=metrics[0], nIter=nIter)
classificationKWARGS["classifiersConfigs"] = bestSettings
try:
classificationKWARGS["fusionMethodConfig"] = fusionConfig
except:
pass
logging.info("Done:\t Randomsearching best settings for monoview classifiers")
DATASET_LENGTH = len(trainIndices)
classifier = classifierClass(NB_VIEW, DATASET_LENGTH, DATASET.get("Labels").value[trainIndices], NB_CORES=nbCores, **classificationKWARGS)
classifier.fit_hdf5(DATASET, trainIndices=trainIndices, viewsIndices=viewsIndices)
kFoldClassifierIter.append(classifier)
learningTime = time.time() - extractionTime - t_start
kFoldLearningTimeIter.append(learningTime)
kFoldPredictedTrainLabelsIter.append(classifier.predict_hdf5(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices))
kFoldPredictedTestLabelsIter.append(classifier.predict_hdf5(DATASET, usedIndices=fold, viewsIndices=viewsIndices))
kFoldPredictedValidationLabelsIter.append(classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices))
kFoldPredictionTimeIter.append(time.time() - extractionTime - t_start - learningTime)
logging.info("\tDone: \t Fold number " + str(foldIdx + 1))
kFoldPredictedTrainLabels.append(kFoldPredictedTrainLabelsIter)
kFoldPredictedTestLabels.append(kFoldPredictedTestLabelsIter)
kFoldPredictedValidationLabels.append(kFoldPredictedValidationLabelsIter)
kFoldLearningTime.append(kFoldLearningTimeIter)
kFoldPredictionTime.append(kFoldPredictionTimeIter)
kFoldClassifier.append(kFoldClassifierIter)
ikFolds.append(kFolds)
ivalidationIndices.append(validationIndices) ivalidationIndices.append(validationIndices)
classifiersIterations.append(classifier)
logging.info("Done:\t Classification")
classificationTime = time.time() - t_start classificationTime = time.time() - t_start
logging.info("Done:\t Classification")
logging.info("Info:\t Time for Classification: " + str(int(classificationTime)) + "[s]") logging.info("Info:\t Time for Classification: " + str(int(classificationTime)) + "[s]")
logging.info("Start:\t Result Analysis for " + CL_type) logging.info("Start:\t Result Analysis for " + CL_type)
times = (extractionTime, kFoldLearningTime, kFoldPredictionTime, classificationTime) times = (extractionTime, classificationTime)
stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute(kFoldClassifier, kFoldPredictedTrainLabels, stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute(classifiersIterations, trainLabelsIterations,
kFoldPredictedTestLabels, kFoldPredictedValidationLabels, testLabelsIterations, DATASET,
DATASET, classificationKWARGS, learningRate, LABELS_DICTIONARY, classificationKWARGS, learningRate,
views, nbCores, times, ikFolds, name, nbFolds, LABELS_DICTIONARY,views, nbCores, times,
ivalidationIndices, gridSearch, nIter, metrics, statsIter, viewsIndices) name, nbFolds, ivalidationIndices,
gridSearch, nIter, metrics, statsIter,
viewsIndices)
labelsSet = set(LABELS_DICTIONARY.values()) labelsSet = set(LABELS_DICTIONARY.values())
logging.info(stringAnalysis) logging.info(stringAnalysis)
featureString = "-".join(views) featureString = "-".join(views)
......
...@@ -26,6 +26,15 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices= ...@@ -26,6 +26,15 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices=
return monoviewData return monoviewData
def genParamsSets(classificationKWARGS, nIter=1):
fusionTypeName = classificationKWARGS["fusionType"]
fusionTypePackage = globals()[fusionTypeName+"Package"]
fusionMethodModuleName = classificationKWARGS["fusionMethod"]
fusionMethodModule = getattr(fusionTypePackage, fusionMethodModuleName)
fusionMethodConfig = fusionMethodModule.genParamsSets(classificationKWARGS, nIter=nIter)
return fusionMethodConfig
def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices, metric=None, nIter=30): def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices, metric=None, nIter=30):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
...@@ -56,7 +65,7 @@ def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices ...@@ -56,7 +65,7 @@ def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices
class Fusion: class Fusion:
def __init__(self, NB_VIEW, DATASET_LENGTH, CLASS_LABELS, NB_CORES=1,**kwargs): def __init__(self, NB_CORES=1,**kwargs):
fusionType = kwargs['fusionType'] fusionType = kwargs['fusionType']
fusionMethod = kwargs['fusionMethod'] fusionMethod = kwargs['fusionMethod']
fusionTypePackage = globals()[fusionType+"Package"] fusionTypePackage = globals()[fusionType+"Package"]
...@@ -66,16 +75,20 @@ class Fusion: ...@@ -66,16 +75,20 @@ class Fusion:
classifierKWARGS = dict((key, value) for key, value in kwargs.iteritems() if key not in ['fusionType', 'fusionMethod']) classifierKWARGS = dict((key, value) for key, value in kwargs.iteritems() if key not in ['fusionType', 'fusionMethod'])
self.classifier = fusionMethodClass(NB_CORES=nbCores, **classifierKWARGS) self.classifier = fusionMethodClass(NB_CORES=nbCores, **classifierKWARGS)
def setParams(self, paramsSet):
self.classifier.setParams(paramsSet)
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None):
self.classifier.fit_hdf5(DATASET, trainIndices=trainIndices, viewsIndices=viewsIndices) self.classifier.fit_hdf5(DATASET, trainIndices=trainIndices, viewsIndices=viewsIndices)
def fit(self, DATASET, CLASS_LABELS, DATASET_LENGTH, NB_VIEW, NB_CLASS, NB_CORES, trainArguments): # def fit(self, DATASET, CLASS_LABELS, DATASET_LENGTH, NB_VIEW, NB_CLASS, NB_CORES, trainArguments):
fusionType, fusionMethod, fusionConfig, monoviewClassifier, monoviewClassifierConfig = trainArguments # fusionType, fusionMethod, fusionConfig, monoviewClassifier, monoviewClassifierConfig = trainArguments
fusionTypeModule = globals()[fusionType] # Early/late fusion # fusionTypeModule = globals()[fusionType] # Early/late fusion
trainFusion = getattr(fusionTypeModule, fusionMethod+"Train") # linearWeighted for example # trainFusion = getattr(fusionTypeModule, fusionMethod+"Train") # linearWeighted for example
classifier = trainFusion(DATASET, CLASS_LABELS, DATASET_LENGTH, NB_VIEW, monoviewClassifier, # classifier = trainFusion(DATASET, CLASS_LABELS, DATASET_LENGTH, NB_VIEW, monoviewClassifier,
monoviewClassifierConfig, fusionConfig) # monoviewClassifierConfig, fusionConfig)
return fusionType, fusionMethod, classifier # return fusionType, fusionMethod, classifier
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if usedIndices == None: if usedIndices == None:
...@@ -97,12 +110,12 @@ class Fusion: ...@@ -97,12 +110,12 @@ class Fusion:
predictedLabels = [] predictedLabels = []
return predictedLabels return predictedLabels
def predict(self, DATASET, classifier, NB_CLASS): # def predict(self, DATASET, classifier, NB_CLASS):
fusionType, fusionMethod, fusionClassifier = classifier # fusionType, fusionMethod, fusionClassifier = classifier
fusionType = globals()[fusionType] # Early/late fusion # fusionType = globals()[fusionType] # Early/late fusion
predictFusion = getattr(fusionType, fusionMethod+"Predict") # linearWeighted for example # predictFusion = getattr(fusionType, fusionMethod+"Predict") # linearWeighted for example
predictedLabels = predictFusion(DATASET, fusionClassifier) # predictedLabels = predictFusion(DATASET, fusionClassifier)
return predictedLabels # return predictedLabels
...@@ -4,6 +4,16 @@ import numpy as np ...@@ -4,6 +4,16 @@ import numpy as np
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
def genParamsSets(classificationKWARGS, nIter=1):
nbView = classificationKWARGS["nbView"]
paramsSets = []
for _ in range(nIter):
randomWeightsArray = np.random.random_sample(nbView)
normalizedArray = randomWeightsArray/np.sum(randomWeightsArray)
paramsSets.append([normalizedArray])
return paramsSets
def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
...@@ -43,6 +53,9 @@ class WeightedLinear(EarlyFusionClassifier): ...@@ -43,6 +53,9 @@ class WeightedLinear(EarlyFusionClassifier):
NB_CORES=self.nbCores, #**self.monoviewClassifiersConfig) NB_CORES=self.nbCores, #**self.monoviewClassifiersConfig)
**self.monoviewClassifiersConfig) **self.monoviewClassifiersConfig)
def setParams(self, paramsSet):
self.weights = paramsSet[0]
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
......
...@@ -4,6 +4,16 @@ import numpy as np ...@@ -4,6 +4,16 @@ import numpy as np
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
from utils.Dataset import getV from utils.Dataset import getV
def genParamsSets(classificationKWARGS, nIter=1):
nbView = classificationKWARGS["nbView"]
paramsSets = []
for _ in range(nIter):
randomWeightsArray = np.random.random_sample(nbView)
normalizedArray = randomWeightsArray/np.sum(randomWeightsArray)
paramsSets.append([normalizedArray])
return paramsSets
def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
...@@ -29,7 +39,11 @@ class BayesianInference(LateFusionClassifier): ...@@ -29,7 +39,11 @@ class BayesianInference(LateFusionClassifier):
def __init__(self, NB_CORES=1, **kwargs): def __init__(self, NB_CORES=1, **kwargs):
LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'],
NB_CORES=NB_CORES) NB_CORES=NB_CORES)
self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0]))
# self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0]))
self.weights = None #A modifier !!
def setParams(self, paramsSet):
self.weights = paramsSet[0]
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
......
...@@ -5,6 +5,15 @@ from sklearn.metrics import accuracy_score ...@@ -5,6 +5,15 @@ from sklearn.metrics import accuracy_score
from utils.Dataset import getV from utils.Dataset import getV
def genParamsSets(classificationKWARGS, nIter=1):
nbView = classificationKWARGS["nbView"]
paramsSets = []
for _ in range(nIter):
randomWeightsArray = np.random.random_sample(nbView)
normalizedArray = randomWeightsArray/np.sum(randomWeightsArray)
paramsSets.append([normalizedArray])
return paramsSets
def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
...@@ -32,6 +41,9 @@ class MajorityVoting(LateFusionClassifier): ...@@ -32,6 +41,9 @@ class MajorityVoting(LateFusionClassifier):
NB_CORES=NB_CORES) NB_CORES=NB_CORES)
self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0]))
def setParams(self, paramsSet):
self.weights = paramsSet[0]
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
......
...@@ -14,6 +14,18 @@ from sklearn.metrics import accuracy_score ...@@ -14,6 +14,18 @@ from sklearn.metrics import accuracy_score
import itertools import itertools
def genParamsSets(classificationKWARGS, nIter=1):
nbView = classificationKWARGS["nbView"]
paramsSets = []
for _ in range(nIter):
max_attributes = random.randint(1, 20)
p = random.random()
model = random.choice(["conjunction", "disjunction"])
order = random.randint(1,nbView)
paramsSets.append([p, max_attributes, model, order])
return paramsSets
def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
...@@ -44,6 +56,16 @@ class SCMForLinear(LateFusionClassifier): ...@@ -44,6 +56,16 @@ class SCMForLinear(LateFusionClassifier):
NB_CORES=NB_CORES) NB_CORES=NB_CORES)
self.SCMClassifier = None self.SCMClassifier = None
self.config = kwargs['fusionMethodConfig'][0] self.config = kwargs['fusionMethodConfig'][0]
self.p = None
self.maxAttributes = None
self.order = None
self.modelType = None
def setParams(self, paramsSet):
self.p = paramsSet[0]
self.maxAttributes = paramsSet[1]
self.order = paramsSet[2]
self.modelType = paramsSet[3]
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
...@@ -84,13 +106,17 @@ class SCMForLinear(LateFusionClassifier): ...@@ -84,13 +106,17 @@ class SCMForLinear(LateFusionClassifier):
def SCMForLinearFusionFit(self, DATASET, usedIndices=None, viewsIndices=None): def SCMForLinearFusionFit(self, DATASET, usedIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
p = float(self.config[0]) if self.p is None:
maxAttributes = int(self.config[1]) self.p = float(self.config[0])
modelType = self.config[2] if self.maxAttributes is None:
self.maxAttributes = int(self.config[1])
if self.modelType is None:
self.modelType = self.config[2]
if self.order is None:
self.order = self.config[3] self.order = self.config[3]
nbView = len(viewsIndices) nbView = len(viewsIndices)
self.SCMClassifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=maxAttributes, model_type=modelType, verbose=False) self.SCMClassifier = pyscm.scm.SetCoveringMachine(p=self.p, max_attributes=self.maxAttributes, model_type=self.modelType, verbose=False)
monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int)
for index, viewIndex in enumerate(viewsIndices): for index, viewIndex in enumerate(viewsIndices):
monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict(
......
...@@ -6,6 +6,13 @@ from sklearn.svm import SVC ...@@ -6,6 +6,13 @@ from sklearn.svm import SVC
from utils.Dataset import getV from utils.Dataset import getV
def genParamsSets(classificationKWARGS, nIter=1):
nbView = classificationKWARGS["nbView"]
paramsSets = []
for _ in range(nIter):
paramsSets.append([])
return paramsSets
def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None):
return None return None
...@@ -31,6 +38,9 @@ class SVMForLinear(LateFusionClassifier): ...@@ -31,6 +38,9 @@ class SVMForLinear(LateFusionClassifier):
enumerate(self.monoviewClassifiersConfigs[index])))) enumerate(self.monoviewClassifiersConfigs[index]))))
self.SVMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) self.SVMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices)
def setParams(self, paramsSet):
pass
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
......
...@@ -5,6 +5,16 @@ from sklearn.metrics import accuracy_score ...@@ -5,6 +5,16 @@ from sklearn.metrics import accuracy_score
from utils.Dataset import getV from utils.Dataset import getV
def genParamsSets(classificationKWARGS, nIter=1):
nbView = classificationKWARGS["nbView"]
paramsSets = []
for _ in range(nIter):
randomWeightsArray = np.random.random_sample(nbView)
normalizedArray = randomWeightsArray/np.sum(randomWeightsArray)
paramsSets.append([normalizedArray])
return paramsSets
def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
...@@ -35,6 +45,9 @@ class WeightedLinear(LateFusionClassifier): ...@@ -35,6 +45,9 @@ class WeightedLinear(LateFusionClassifier):
else: else:
self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0]))
def setParams(self, paramsSet):
self.weights = paramsSet[0]
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if type(viewsIndices)==type(None): if type(viewsIndices)==type(None):
viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
......
...@@ -13,7 +13,7 @@ __author__ = "Baptiste Bauvin" ...@@ -13,7 +13,7 @@ __author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype __status__ = "Prototype" # Production, Development, Prototype
def makeMeNoisy(viewData, percentage=5): def makeMeNoisy(viewData, percentage=25):
viewData = viewData.astype(bool) viewData = viewData.astype(bool)
nbNoisyCoord = int(percentage/100.0*viewData.shape[0]*viewData.shape[1]) nbNoisyCoord = int(percentage/100.0*viewData.shape[0]*viewData.shape[1])
rows = range(viewData.shape[0]) rows = range(viewData.shape[0])
...@@ -26,7 +26,7 @@ def makeMeNoisy(viewData, percentage=5): ...@@ -26,7 +26,7 @@ def makeMeNoisy(viewData, percentage=5):
return noisyViewData return noisyViewData
def getPlausibleDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME, nbView=10, nbClass=2, datasetLength=500): def getPlausibleDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME, nbView=3, nbClass=2, datasetLength=500):
nbFeatures = 150 nbFeatures = 150
datasetFile = h5py.File(pathF+"Plausible.hdf5", "w") datasetFile = h5py.File(pathF+"Plausible.hdf5", "w")
CLASS_LABELS = np.array([0 for i in range(datasetLength/2)]+[1 for i in range(datasetLength/2)]) CLASS_LABELS = np.array([0 for i in range(datasetLength/2)]+[1 for i in range(datasetLength/2)])
......
...@@ -16,7 +16,7 @@ def isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict): ...@@ -16,7 +16,7 @@ def isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict):
def subSample(data, labels, subSampling, weights=None): def subSample(data, labels, subSampling, weights=None):
if weights == None: if weights is None:
weights = np.ones(len(labels))/len(labels) weights = np.ones(len(labels))/len(labels)
nbExamples = len(labels) nbExamples = len(labels)
labelSupports, labelDict = getLabelSupports(labels) labelSupports, labelDict = getLabelSupports(labels)
......
...@@ -61,49 +61,51 @@ def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, ...@@ -61,49 +61,51 @@ def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices,
class Mumbo: class Mumbo:
def __init__(self, NB_VIEW, DATASET_LENGTH, CLASS_LABELS, NB_CORES=1,**kwargs): def __init__(self, NB_CORES=1, **kwargs):
self.maxIter = kwargs["maxIter"] self.maxIter = kwargs["maxIter"]
self.minIter = kwargs["minIter"] self.minIter = kwargs["minIter"]
self.threshold = kwargs["threshold"] self.threshold = kwargs["threshold"]
self.classifiersNames = kwargs["classifiersNames"] self.classifiersNames = kwargs["classifiersNames"]
self.classifiersConfigs = kwargs["classifiersConfigs"] self.classifiersConfigs = kwargs["classifiersConfigs"]
nbClass = len(set(CLASS_LABELS)) nbView = kwargs["nbView"]
self.edges = np.zeros((self.maxIter, nbView))
self.alphas = np.zeros((self.maxIter, nbView))
self.generalAlphas = np.zeros(self.maxIter)
self.nbCores = NB_CORES
self.iterIndex = 0
self.bestClassifiers = []
self.bestViews = np.zeros(self.maxIter, dtype=int)
self.averageAccuracies = np.zeros((self.maxIter, nbView))
self.iterAccuracies = np.zeros(self.maxIter)
def initDataDependant(self, datasetLength, nbView, nbClass, labels):
self.costMatrices = np.array([ self.costMatrices = np.array([
np.array([ np.array([
np.array([ np.array([
np.array([1 if CLASS_LABELS[exampleIndice] != classe np.array([1 if labels[exampleIndice] != classe
else -(nbClass - 1) else -(nbClass - 1)
for classe in range(nbClass) for classe in range(nbClass)
]) for exampleIndice in range(DATASET_LENGTH) ]) for exampleIndice in range(datasetLength)
]) for viewIndice in range(NB_VIEW)]) ]) for viewIndice in range(nbView)])
if iteration == 0 if iteration == 0
else np.zeros((NB_VIEW, DATASET_LENGTH, nbClass)) else np.zeros((nbView, datasetLength, nbClass))
for iteration in range(self.maxIter + 1) for iteration in range(self.maxIter + 1)
]) ])
self.generalCostMatrix = np.array([ self.generalCostMatrix = np.array([
np.array([ np.array([
np.array([1 if CLASS_LABELS[exampleIndice] != classe np.array([1 if labels[exampleIndice] != classe
else -(nbClass - 1) else -(nbClass - 1)
for classe in range(nbClass) for classe in range(nbClass)
]) for exampleIndice in range(DATASET_LENGTH) ]) for exampleIndice in range(datasetLength)
]) for iteration in range(self.maxIter) ]) for iteration in range(self.maxIter)
]) ])
self.fs = np.zeros((self.maxIter, NB_VIEW, DATASET_LENGTH, nbClass)) self.fs = np.zeros((self.maxIter, nbView, datasetLength, nbClass))
self.ds = np.zeros((self.maxIter, NB_VIEW, DATASET_LENGTH)) self.ds = np.zeros((self.maxIter, nbView, datasetLength))
self.edges = np.zeros((self.maxIter, NB_VIEW)) self.predictions = np.zeros((self.maxIter, nbView, datasetLength))
self.alphas = np.zeros((self.maxIter, NB_VIEW)) self.generalFs = np.zeros((self.maxIter, datasetLength, nbClass))
self.predictions = np.zeros((self.maxIter, NB_VIEW, DATASET_LENGTH))
self.generalAlphas = np.zeros(self.maxIter)
self.generalFs = np.zeros((self.maxIter, DATASET_LENGTH, nbClass))
self.nbCores = NB_CORES
self.iterIndex = 0
self.bestClassifiers = []
self.bestViews = np.zeros(self.maxIter, dtype=int)
self.averageAccuracies = np.zeros((self.maxIter, NB_VIEW))
self.iterAccuracies = np.zeros(self.maxIter)
def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None):
# Initialization # Initialization
if not trainIndices: if not trainIndices:
trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
...@@ -113,7 +115,7 @@ class Mumbo: ...@@ -113,7 +115,7 @@ class Mumbo:
NB_VIEW = len(viewsIndices) NB_VIEW = len(viewsIndices)
DATASET_LENGTH = len(trainIndices) DATASET_LENGTH = len(trainIndices)
LABELS = DATASET["Labels"][trainIndices] LABELS = DATASET["Labels"][trainIndices]
self.initDataDependant(DATASET_LENGTH, NB_VIEW, NB_CLASS, LABELS)
# Learning # Learning
isStabilized=False isStabilized=False
self.iterIndex = 0 self.iterIndex = 0
......
import h5py
import numpy as np
import sys
import Multiview
import Metrics
def searchBestSettings(dataset, classifierName, metrics,viewsIndices=None, usedIndices=None, kFolds=None, searchingTool="gridSearch", nIter=1, **kwargs):
if viewsIndices is None:
viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
thismodule = sys.modules[__name__]
searchingToolMethod = getattr(thismodule, searchingTool)
bestSettings = searchingToolMethod(dataset, classifierName, metrics, viewsIndices=viewsIndices, usedIndices=usedIndices, kFolds=kFolds, nIter=nIter, **kwargs)
return bestSettings # or well set clasifier ?
def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
#si grid search est selectionne, on veut tester certaines valeurs
pass
def randomizedSearch(dataset, classifierName, metrics, viewsIndices=None, usedIndices=None, kFolds=None, nIter=1, nbCores=1, **classificationKWARGS):
if viewsIndices is None:
viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
metric = metrics[0]
metricModule = getattr(Metrics, metric[0])
if metric[1]!=None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
if metricModule.getConfig()[-14]=="h":
baseScore = -1000.0
isBetter = "higher"
else:
baseScore = 1000.0
isBetter = "lower"
classifierPackage =getattr(Multiview,classifierName) # Permet d'appeler un module avec une string
classifierModule = getattr(classifierPackage, classifierName)
classifierClass = getattr(classifierModule, classifierName)
if classifierName != "Mumbo":
datasetLength = dataset.get("Metadata").attrs["datasetLength"]
paramsSets = classifierModule.genParamsSets(classificationKWARGS, nIter=nIter)
bestScore = 0
bestSettings = []
for paramsSet in paramsSets:
scores = []
for fold in kFolds:
fold.sort()
trainIndices = [index for index in range(datasetLength) if (index not in fold) and (index in usedIndices)]
classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS)
classifier.setParams(paramsSet)
classifier.fit_hdf5(dataset, trainIndices=trainIndices, viewsIndices=viewsIndices)
trainLabels = classifier.predict_hdf5(dataset, usedIndices=trainIndices, viewsIndices=viewsIndices)
testLabels = classifier.predict_hdf5(dataset, usedIndices=fold, viewsIndices=viewsIndices)
trainScore = metricModule.score(dataset.get("Labels").value[trainIndices], trainLabels)
testScore = metricModule.score(dataset.get("Labels").value[fold], testLabels)
scores.append(testScore)
crossValScore = np.mean(np.array(scores))
if isBetter=="higher" and crossValScore>bestScore:
baseScore = crossValScore
bestSettings = paramsSet
if isBetter=="lower" and crossValScore<bestScore:
baseScore = crossValScore
bestSettings = paramsSet
classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS)
classifier.setParams(paramsSet)
else:
bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, usedIndices, metric=metric, nIter=nIter)
classificationKWARGS["classifiersConfigs"] = bestConfigs
classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS)
return classifier
def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
pass
# nohup python ~/dev/git/spearmint/spearmint/main.py . &
# import json
# import numpy as np
# import math
#
# from os import system
# from os.path import join
#
#
# def run_kover(dataset, split, model_type, p, max_rules, output_dir):
# outdir = join(output_dir, "%s_%f" % (model_type, p))
# kover_command = "kover learn " \
# "--dataset '%s' " \
# "--split %s " \
# "--model-type %s " \
# "--p %f " \
# "--max-rules %d " \
# "--max-equiv-rules 10000 " \
# "--hp-choice cv " \
# "--random-seed 0 " \
# "--output-dir '%s' " \
# "--n-cpu 1 " \
# "-v" % (dataset,
# split,
# model_type,
# p,
# max_rules,
# outdir)
#
# system(kover_command)
#
# return json.load(open(join(outdir, "results.json")))["cv"]["best_hp"]["score"]
#
#
# def main(job_id, params):
# print params
#
# max_rules = params["MAX_RULES"][0]
#
# species = params["SPECIES"][0]
# antibiotic = params["ANTIBIOTIC"][0]
# split = params["SPLIT"][0]
#
# model_type = params["model_type"][0]
#
# # LS31
# if species == "saureus":
# dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/data/earle_2016/saureus/kover_datasets/%s.kover" % antibiotic
# else:
# dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/genome_scm_paper/data/%s/%s.kover" % (species, antibiotic)
#
# output_path = "/home/droale01/droale01-ls31/projects/genome_scm/manifold_scm/spearmint/vanilla_scm/%s/%s" % (species, antibiotic)
#
# # MacBook
# #dataset_path = "/Volumes/Einstein 1/kover_phylo/datasets/%s/%s.kover" % (species, antibiotic)
# #output_path = "/Volumes/Einstein 1/manifold_scm/version2/%s_spearmint" % antibiotic
#
# return run_kover(dataset=dataset_path,
# split=split,
# model_type=model_type,
# p=params["p"][0],
# max_rules=max_rules,
# output_dir=output_path)
# killall mongod && sleep 1 && rm -r database/* && rm mongo.log*
# mongod --fork --logpath mongo.log --dbpath database
#
# {
# "language" : "PYTHON",
# "experiment-name" : "vanilla_scm_cdiff_azithromycin",
# "polling-time" : 1,
# "resources" : {
# "my-machine" : {
# "scheduler" : "local",
# "max-concurrent" : 5,
# "max-finished-jobs" : 100
# }
# },
# "tasks": {
# "resistance" : {
# "type" : "OBJECTIVE",
# "likelihood" : "NOISELESS",
# "main-file" : "spearmint_wrapper",
# "resources" : ["my-machine"]
# }
# },
# "variables": {
#
# "MAX_RULES" : {
# "type" : "ENUM",
# "size" : 1,
# "options": [10]
# },
#
#
# "SPECIES" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["cdiff"]
# },
# "ANTIBIOTIC" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["azithromycin"]
# },
# "SPLIT" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["split_seed_2"]
# },
#
#
# "model_type" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["conjunction", "disjunction"]
# },
# "p" : {
# "type" : "FLOAT",
# "size" : 1,
# "min" : 0.01,
# "max" : 100
# }
# }
# }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment