Select Git revision
HyperParameterSearch.py 9.89 KiB
import numpy as np
import sys
import Multiview
import Metrics
import matplotlib.pyplot as plt
import itertools
def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None,
searchingTool="hyperParamSearch", nIter=1, **kwargs):
if viewsIndices is None:
viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
thismodule = sys.modules[__name__]
searchingToolMethod = getattr(thismodule, searchingTool)
bestSettings = searchingToolMethod(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState,
viewsIndices=viewsIndices, nIter=nIter, **kwargs)
return bestSettings # or well set clasifier ?
def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
# si grid search est selectionne, on veut tester certaines valeurs
pass
def randomizedSearch(dataset, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1,
nbCores=1, **classificationKWARGS):
if viewsIndices is None:
viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
metric = metrics[0]
metricModule = getattr(Metrics, metric[0])
if metric[1] is not None:
metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
else:
metricKWARGS = {}
classifierPackage = getattr(Multiview, classifierName) # Permet d'appeler un module avec une string
classifierModule = getattr(classifierPackage, classifierName)
classifierClass = getattr(classifierModule, classifierName)
if classifierName != "Mumbo":
paramsSets = classifierModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter)
if metricModule.getConfig()[-14] == "h":
baseScore = -1000.0
isBetter = "higher"
else:
baseScore = 1000.0
isBetter = "lower"
bestSettings = None
kFolds = KFolds.split(learningIndices, dataset.get("Labels").value[learningIndices])
for paramsSet in paramsSets:
scores = []
for trainIndices, testIndices in kFolds:
classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS)
classifier.setParams(paramsSet)
classifier.fit_hdf5(dataset, trainIndices=learningIndices[trainIndices], viewsIndices=viewsIndices)
testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices],
viewsIndices=viewsIndices)
testScore = metricModule.score(dataset.get("Labels").value[learningIndices[testIndices]], testLabels)
scores.append(testScore)
crossValScore = np.mean(np.array(scores))
if isBetter == "higher" and crossValScore > baseScore:
baseScore = crossValScore
bestSettings = paramsSet
elif isBetter == "lower" and crossValScore < baseScore:
baseScore = crossValScore
bestSettings = paramsSet
classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS)
classifier.setParams(bestSettings)
else:
bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, learningIndices,
randomState, metric=metric, nIter=nIter)
classificationKWARGS["classifiersConfigs"] = bestConfigs
classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS)
return classifier
def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
pass
def genHeatMaps(params, scoresArray, outputFileName):
nbParams = len(params)
if nbParams > 2:
combinations = itertools.combinations(range(nbParams), 2)
else:
combinations = [(0, 1)]
for combination in combinations:
paramName1, paramArray1 = params[combination[0]]
paramName2, paramArray2 = params[combination[1]]
paramArray1Set = np.sort(np.array(list(set(paramArray1))))
paramArray2Set = np.sort(np.array(list(set(paramArray2))))
scoresMatrix = np.zeros((len(paramArray2Set), len(paramArray1Set))) - 0.1
for param1, param2, score in zip(paramArray1, paramArray2, scoresArray):
param1Index, = np.where(paramArray1Set == param1)
param2Index, = np.where(paramArray2Set == param2)
scoresMatrix[int(param2Index), int(param1Index)] = score
plt.figure(figsize=(8, 6))
plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
plt.imshow(scoresMatrix, interpolation='nearest', cmap=plt.cm.hot,
)
plt.xlabel(paramName1)
plt.ylabel(paramName2)
plt.colorbar()
plt.xticks(np.arange(len(paramArray1Set)), paramArray1Set)
plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45)
plt.title('Validation metric')
plt.savefig(outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png")
plt.close()
# nohup python ~/dev/git/spearmint/spearmint/main.py . &
# import json
# import numpy as np
# import math
#
# from os import system
# from os.path import join
#
#
# def run_kover(dataset, split, model_type, p, max_rules, output_dir):
# outdir = join(output_dir, "%s_%f" % (model_type, p))
# kover_command = "kover learn " \
# "--dataset '%s' " \
# "--split %s " \
# "--model-type %s " \
# "--p %f " \
# "--max-rules %d " \
# "--max-equiv-rules 10000 " \
# "--hp-choice cv " \
# "--random-seed 0 " \
# "--output-dir '%s' " \
# "--n-cpu 1 " \
# "-v" % (dataset,
# split,
# model_type,
# p,
# max_rules,
# outdir)
#
# system(kover_command)
#
# return json.load(open(join(outdir, "results.json")))["cv"]["best_hp"]["score"]
#
#
# def main(job_id, params):
# print params
#
# max_rules = params["MAX_RULES"][0]
#
# species = params["SPECIES"][0]
# antibiotic = params["ANTIBIOTIC"][0]
# split = params["SPLIT"][0]
#
# model_type = params["model_type"][0]
#
# # LS31
# if species == "saureus":
# dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/data/earle_2016/saureus/kover_datasets/%s.kover" % antibiotic
# else:
# dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/genome_scm_paper/data/%s/%s.kover" % (species, antibiotic)
#
# output_path = "/home/droale01/droale01-ls31/projects/genome_scm/manifold_scm/spearmint/vanilla_scm/%s/%s" % (species, antibiotic)
#
# # MacBook
# #dataset_path = "/Volumes/Einstein 1/kover_phylo/datasets/%s/%s.kover" % (species, antibiotic)
# #output_path = "/Volumes/Einstein 1/manifold_scm/version2/%s_spearmint" % antibiotic
#
# return run_kover(dataset=dataset_path,
# split=split,
# model_type=model_type,
# p=params["p"][0],
# max_rules=max_rules,
# output_dir=output_path)
# killall mongod && sleep 1 && rm -r database/* && rm mongo.log*
# mongod --fork --logpath mongo.log --dbpath database
#
# {
# "language" : "PYTHON",
# "experiment-name" : "vanilla_scm_cdiff_azithromycin",
# "polling-time" : 1,
# "resources" : {
# "my-machine" : {
# "scheduler" : "local",
# "max-concurrent" : 5,
# "max-finished-jobs" : 100
# }
# },
# "tasks": {
# "resistance" : {
# "type" : "OBJECTIVE",
# "likelihood" : "NOISELESS",
# "main-file" : "spearmint_wrapper",
# "resources" : ["my-machine"]
# }
# },
# "variables": {
#
# "MAX_RULES" : {
# "type" : "ENUM",
# "size" : 1,
# "options": [10]
# },
#
#
# "SPECIES" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["cdiff"]
# },
# "ANTIBIOTIC" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["azithromycin"]
# },
# "SPLIT" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["split_seed_2"]
# },
#
#
# "model_type" : {
# "type" : "ENUM",
# "size" : 1,
# "options": ["conjunction", "disjunction"]
# },
# "p" : {
# "type" : "FLOAT",
# "size" : 1,
# "min" : 0.01,
# "max" : 100
# }
# }
# }