Skip to content
Snippets Groups Projects
Select Git revision
  • 9c966953958d346141ba6637b6960058015e321d
  • master default protected
  • erased
  • states
  • negatives
  • temp
  • negativeExamples
  • Rl
8 results

main.py

Blame
  • HyperParameterSearch.py 9.89 KiB
    import numpy as np
    import sys
    import Multiview
    import Metrics
    import matplotlib.pyplot as plt
    import itertools
    
    
    def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None,
                           searchingTool="hyperParamSearch", nIter=1, **kwargs):
        if viewsIndices is None:
            viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
        thismodule = sys.modules[__name__]
        searchingToolMethod = getattr(thismodule, searchingTool)
        bestSettings = searchingToolMethod(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState,
                                           viewsIndices=viewsIndices, nIter=nIter, **kwargs)
        return bestSettings  # or well set clasifier ?
    
    
    def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
        # si grid search est selectionne, on veut tester certaines valeurs
        pass
    
    
    def randomizedSearch(dataset, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1,
                         nbCores=1, **classificationKWARGS):
        if viewsIndices is None:
            viewsIndices = range(dataset.get("Metadata").attrs["nbView"])
        metric = metrics[0]
        metricModule = getattr(Metrics, metric[0])
        if metric[1] is not None:
            metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
        else:
            metricKWARGS = {}
        classifierPackage = getattr(Multiview, classifierName)  # Permet d'appeler un module avec une string
        classifierModule = getattr(classifierPackage, classifierName)
        classifierClass = getattr(classifierModule, classifierName)
        if classifierName != "Mumbo":
            paramsSets = classifierModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter)
            if metricModule.getConfig()[-14] == "h":
                baseScore = -1000.0
                isBetter = "higher"
            else:
                baseScore = 1000.0
                isBetter = "lower"
            bestSettings = None
            kFolds = KFolds.split(learningIndices, dataset.get("Labels").value[learningIndices])
            for paramsSet in paramsSets:
                scores = []
                for trainIndices, testIndices in kFolds:
                    classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS)
                    classifier.setParams(paramsSet)
                    classifier.fit_hdf5(dataset, trainIndices=learningIndices[trainIndices], viewsIndices=viewsIndices)
                    testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices],
                                                         viewsIndices=viewsIndices)
                    testScore = metricModule.score(dataset.get("Labels").value[learningIndices[testIndices]], testLabels)
                    scores.append(testScore)
                crossValScore = np.mean(np.array(scores))
    
                if isBetter == "higher" and crossValScore > baseScore:
                    baseScore = crossValScore
                    bestSettings = paramsSet
                elif isBetter == "lower" and crossValScore < baseScore:
                    baseScore = crossValScore
                    bestSettings = paramsSet
            classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS)
            classifier.setParams(bestSettings)
    
        else:
            bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, learningIndices,
                                                              randomState, metric=metric, nIter=nIter)
            classificationKWARGS["classifiersConfigs"] = bestConfigs
            classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS)
    
        return classifier
    
    
    def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs):
        pass
    
    
    def genHeatMaps(params, scoresArray, outputFileName):
        nbParams = len(params)
        if nbParams > 2:
            combinations = itertools.combinations(range(nbParams), 2)
        else:
            combinations = [(0, 1)]
        for combination in combinations:
            paramName1, paramArray1 = params[combination[0]]
            paramName2, paramArray2 = params[combination[1]]
    
            paramArray1Set = np.sort(np.array(list(set(paramArray1))))
            paramArray2Set = np.sort(np.array(list(set(paramArray2))))
    
            scoresMatrix = np.zeros((len(paramArray2Set), len(paramArray1Set))) - 0.1
            for param1, param2, score in zip(paramArray1, paramArray2, scoresArray):
                param1Index, = np.where(paramArray1Set == param1)
                param2Index, = np.where(paramArray2Set == param2)
                scoresMatrix[int(param2Index), int(param1Index)] = score
    
            plt.figure(figsize=(8, 6))
            plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
            plt.imshow(scoresMatrix, interpolation='nearest', cmap=plt.cm.hot,
                       )
            plt.xlabel(paramName1)
            plt.ylabel(paramName2)
            plt.colorbar()
            plt.xticks(np.arange(len(paramArray1Set)), paramArray1Set)
            plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45)
            plt.title('Validation metric')
            plt.savefig(outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png")
            plt.close()
    
            # nohup python ~/dev/git/spearmint/spearmint/main.py . &
    
            # import json
            # import numpy as np
            # import math
            #
            # from os import system
            # from os.path import join
            #
            #
            # def run_kover(dataset, split, model_type, p, max_rules, output_dir):
            #     outdir = join(output_dir, "%s_%f" % (model_type, p))
            #     kover_command = "kover learn " \
            #                     "--dataset '%s' " \
            #                     "--split %s " \
            #                     "--model-type %s " \
            #                     "--p %f " \
            #                     "--max-rules %d " \
            #                     "--max-equiv-rules 10000 " \
            #                     "--hp-choice cv " \
            #                     "--random-seed 0 " \
            #                     "--output-dir '%s' " \
            #                     "--n-cpu 1 " \
            #                     "-v" % (dataset,
            #                             split,
            #                             model_type,
            #                             p,
            #                             max_rules,
            #                             outdir)
            #
            #     system(kover_command)
            #
            #     return json.load(open(join(outdir, "results.json")))["cv"]["best_hp"]["score"]
            #
            #
            # def main(job_id, params):
            #     print params
            #
            #     max_rules = params["MAX_RULES"][0]
            #
            #     species = params["SPECIES"][0]
            #     antibiotic = params["ANTIBIOTIC"][0]
            #     split = params["SPLIT"][0]
            #
            #     model_type = params["model_type"][0]
            #
            #     # LS31
            #     if species == "saureus":
            #         dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/data/earle_2016/saureus/kover_datasets/%s.kover" % antibiotic
            #     else:
            #         dataset_path = "/home/droale01/droale01-ls31/projects/genome_scm/genome_scm_paper/data/%s/%s.kover" % (species, antibiotic)
            #
            #     output_path = "/home/droale01/droale01-ls31/projects/genome_scm/manifold_scm/spearmint/vanilla_scm/%s/%s" % (species, antibiotic)
            #
            #     # MacBook
            #     #dataset_path = "/Volumes/Einstein 1/kover_phylo/datasets/%s/%s.kover" % (species, antibiotic)
            #     #output_path = "/Volumes/Einstein 1/manifold_scm/version2/%s_spearmint" % antibiotic
            #
            #     return run_kover(dataset=dataset_path,
            #                      split=split,
            #                      model_type=model_type,
            #                      p=params["p"][0],
            #                      max_rules=max_rules,
            #                      output_dir=output_path)
            # killall mongod && sleep 1 && rm -r database/* && rm mongo.log*
            # mongod --fork --logpath mongo.log --dbpath database
            #
            # {
            #     "language"        : "PYTHON",
            #     "experiment-name" : "vanilla_scm_cdiff_azithromycin",
            #     "polling-time"    : 1,
            #     "resources" : {
            #         "my-machine" : {
            #             "scheduler"         : "local",
            #             "max-concurrent"    : 5,
            #             "max-finished-jobs" : 100
            #         }
            #     },
            #     "tasks": {
            #         "resistance" : {
            #             "type"       : "OBJECTIVE",
            #             "likelihood" : "NOISELESS",
            #             "main-file"  : "spearmint_wrapper",
            #             "resources"  : ["my-machine"]
            #         }
            #     },
            #     "variables": {
            #
            #         "MAX_RULES" : {
            #             "type" : "ENUM",
            #             "size" : 1,
            #             "options": [10]
            #         },
            #
            #
            #         "SPECIES" : {
            #             "type" : "ENUM",
            #             "size" : 1,
            #             "options": ["cdiff"]
            #         },
            #         "ANTIBIOTIC" : {
            #             "type" : "ENUM",
            #             "size" : 1,
            #             "options": ["azithromycin"]
            #         },
            #         "SPLIT" : {
            #             "type" : "ENUM",
            #             "size" : 1,
            #             "options": ["split_seed_2"]
            #         },
            #
            #
            #         "model_type" : {
            #             "type" : "ENUM",
            #             "size" : 1,
            #             "options": ["conjunction", "disjunction"]
            #         },
            #         "p" : {
            #             "type" : "FLOAT",
            #             "size" : 1,
            #             "min"  : 0.01,
            #             "max"  : 100
            #         }
            #     }
            # }