diff --git a/README.md b/README.md index 7782eb97a1026686cc3c92e3f14fcf402bfecac7..faed06e0249715aa82f8c2e7024d2e5e885c01e8 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ And the following python modules : * [matplotlib](http://matplotlib.org/) - Used to plot results * [sklearn](http://scikit-learn.org/stable/) - Used for the monoview classifiers * [joblib](https://pypi.python.org/pypi/joblib) - Used to compute on multiple threads -* [h5py](www.h5py.org) - Used to generate HDF5 datasets on hard drive and use them to spare RAM +* [h5py](https://www.h5py.org) - Used to generate HDF5 datasets on hard drive and use them to spare RAM * [pickle](https://docs.python.org/3/library/pickle.html) - Used to store some results * ([graphviz](https://pypi.python.org/pypi/graphviz) - Used for decision tree interpretation) diff --git a/docs/source/analyzeresult.rst b/docs/source/analyzeresult.rst index 2367d0d6d17114b02e7ae8770033eb9810088785..f2ca409bdb82368a860b824e1c9734b45992d0c7 100644 --- a/docs/source/analyzeresult.rst +++ b/docs/source/analyzeresult.rst @@ -2,4 +2,4 @@ Result analysis module ====================== .. automodule:: multiview_platform.MonoMultiViewClassifiers.ResultAnalysis - :members: \ No newline at end of file +:members: \ No newline at end of file diff --git a/docs/source/api.rst b/docs/source/api.rst index d5bc51ec2f59e5cf9a482a0c29bfa8197f2b7703..6eec11bf9ce1a8700bdb0288296b12e4073c8231 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -2,9 +2,9 @@ Multiview Platform ================== .. toctree:: - :maxdepth: 1 - :caption: Contents: +:maxdepth: 1 + :caption: Contents: - execution - monomultidoc - analyzeresult \ No newline at end of file + execution + monomultidoc + analyzeresult \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index a62e70c96876ab175addf6f34665fcb6fb6017d3..dd872ce146c9059cb09d42b5bf215037f3e87dac 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -from recommonmark.parser import CommonMarkParser -from recommonmark.transform import AutoStructify + # import os, sys # # MultiviewPlatform documentation build configuration file, created by @@ -38,18 +37,17 @@ add_module_names = False # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'nbsphinx', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon', - 'recommonmark'] - + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'nbsphinx', + 'sphinx.ext.coverage', + 'sphinx.ext.mathjax', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', + 'sphinx.ext.githubpages', + 'sphinx.ext.napoleon', + 'recommonmark'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -57,7 +55,7 @@ templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # -source_suffix = {'.rst': 'restructuredtext', '.md':'markdown'} +source_suffix = {'.rst': 'restructuredtext', '.md': 'markdown'} # source_suffix = '.rst' # source_suffix = ['.rst', '.md'] @@ -100,7 +98,6 @@ pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True - # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for @@ -119,13 +116,11 @@ html_theme = 'sphinx_rtd_theme' # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = [] - # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'MultiviewPlatformdoc' - # -- Options for LaTeX output --------------------------------------------- latex_elements = { @@ -154,7 +149,6 @@ latex_documents = [ u'Baptiste BAUVIN', 'manual'), ] - # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples @@ -164,7 +158,6 @@ man_pages = [ [author], 1) ] - # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples @@ -176,9 +169,6 @@ texinfo_documents = [ 'Miscellaneous'), ] - - - # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None} @@ -186,4 +176,4 @@ intersphinx_mapping = {'https://docs.python.org/': None} # app.add_config_value('recommonmark_config', { # 'auto_toc_tree_section': 'Contents', # }, True) -# app.add_transform(AutoStructify) \ No newline at end of file +# app.add_transform(AutoStructify) diff --git a/docs/source/execution.rst b/docs/source/execution.rst index 3d26fece2aa89ea3212a2051624d9068f8e8b8fb..a1affdb1af4cabf34b685bff05a1392802b3de72 100644 --- a/docs/source/execution.rst +++ b/docs/source/execution.rst @@ -2,5 +2,5 @@ Welcome to the exection documentation ===================================== .. automodule:: multiview_platform.Exec - :members: +:members: diff --git a/docs/source/index.rst b/docs/source/index.rst index aad5d65fc35aac3a8ca3d0cdda2495b5a51d9c8f..c1f920fca59f85d69efe6472e73fb2aa2a29cc1a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,7 +1,7 @@ .. MultiviewPlatform documentation master file, created by - sphinx-quickstart on Mon Jan 29 17:13:09 2018. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +sphinx-quickstart on Mon Jan 29 17:13:09 2018. +You can adapt this file completely to your liking, but it should at least +contain the root `toctree` directive. Welcome to MultiviewPlatform's documentation! ============================================= @@ -11,11 +11,11 @@ This package is used as an easy-to-use platform to estimate different mono- and The main advantage of the platform is that it allows to add and remove a classifier without modifying its core code (the procedure is described thoroughly in this documentation). .. toctree:: - :maxdepth: 3 - :caption: Contents: +:maxdepth: 3 + :caption: Contents: - readme - api + readme + api .. examples diff --git a/docs/source/monomulti/exec_classif.rst b/docs/source/monomulti/exec_classif.rst index fb379570eb8367796a8ecc95cd12877dcfb03d0b..35a77fc48659a7ae4abfaea0155ff2d99bcd7b57 100644 --- a/docs/source/monomulti/exec_classif.rst +++ b/docs/source/monomulti/exec_classif.rst @@ -2,5 +2,5 @@ Classification execution module =============================== .. automodule:: multiview_platform.MonoMultiViewClassifiers.ExecClassif - :members: - :inherited-members: \ No newline at end of file +:members: + :inherited-members: \ No newline at end of file diff --git a/docs/source/monomulti/metrics.rst b/docs/source/monomulti/metrics.rst index c42b38c49b6529c78865f2ceacf212ae5b55f112..46e970552d76dccbbd41ee80718817fe0c5578ca 100644 --- a/docs/source/monomulti/metrics.rst +++ b/docs/source/monomulti/metrics.rst @@ -2,5 +2,5 @@ Metrics framework ================= .. automodule:: multiview_platform.MonoMultiViewClassifiers.Metrics.framework - :members: - :inherited-members: \ No newline at end of file +:members: + :inherited-members: \ No newline at end of file diff --git a/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst b/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst index 507f7d5917907d61a79b647ccefb6cd088b47f00..6d8e675c2c9d085564f4f796cc7e079b9629de73 100644 --- a/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst +++ b/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst @@ -2,4 +2,4 @@ Diversity Fusion Classifiers ============================ .. automodule:: multiview_platform.MonoMultiViewClassifiers.Multiview.Additions.diversity_utils - :members: +:members: diff --git a/docs/source/monomulti/utils/execution.rst b/docs/source/monomulti/utils/execution.rst index 4956fcbe067c9cc4139c206dc72cd7c475b51d0b..f761534a1f26aa8bfdfbe30ad482759bfa9a8ecb 100644 --- a/docs/source/monomulti/utils/execution.rst +++ b/docs/source/monomulti/utils/execution.rst @@ -2,5 +2,5 @@ Utils execution module ====================== .. automodule:: multiview_platform.MonoMultiViewClassifiers.utils.execution - :members: - :inherited-members: \ No newline at end of file +:members: + :inherited-members: \ No newline at end of file diff --git a/docs/source/monomulti/utils/multiclass.rst b/docs/source/monomulti/utils/multiclass.rst index f7e11aee6c4d459f8c504247bda6ce15432bb53e..cd86315269fe085ddea94183b21d62e2155e3083 100644 --- a/docs/source/monomulti/utils/multiclass.rst +++ b/docs/source/monomulti/utils/multiclass.rst @@ -2,5 +2,5 @@ Utils Multiclass module ======================= .. automodule:: multiview_platform.MonoMultiViewClassifiers.utils.Multiclass - :members: - :inherited-members: \ No newline at end of file +:members: + :inherited-members: \ No newline at end of file diff --git a/docs/source/monomultidoc.rst b/docs/source/monomultidoc.rst index b25fd849aaefb289724abedd80a1a95ee03d3938..157b5f7e3fe1ae599514af2a428de4db8543bc13 100644 --- a/docs/source/monomultidoc.rst +++ b/docs/source/monomultidoc.rst @@ -2,13 +2,13 @@ Mono and mutliview classification ================================= .. toctree:: - :maxdepth: 1 - :caption: Contents: +:maxdepth: 1 + :caption: Contents: - monomulti/metrics - monomulti/monoview_classifier - monomulti/multiview_classifier - monomulti/exec_classif - monomulti/multiview_classifiers/diversity_fusion - monomulti/utils/execution - monomulti/utils/multiclass + monomulti/metrics + monomulti/monoview_classifier + monomulti/multiview_classifier + monomulti/exec_classif + monomulti/multiview_classifiers/diversity_fusion + monomulti/utils/execution + monomulti/utils/multiclass diff --git a/docs/source/readme.rst b/docs/source/readme.rst index 33481978594be226a3dbb05193c7c4bbe54c8d75..80d3e0a6622a17419f888d8d85059751a6ab984d 100644 --- a/docs/source/readme.rst +++ b/docs/source/readme.rst @@ -2,6 +2,6 @@ Read me ======= .. toctree:: - :maxdepth: 1 +:maxdepth: 1 - ../../README.md \ No newline at end of file + ../../README.md \ No newline at end of file diff --git a/docs/source/sphinxext/recommon.py b/docs/source/sphinxext/recommon.py index 6b1cb8c84239dd57a9e36625b57f110de44ae37f..4137b7c4b942ff8192915ed49691172bf71e0965 100644 --- a/docs/source/sphinxext/recommon.py +++ b/docs/source/sphinxext/recommon.py @@ -1,4 +1,5 @@ from recommonmark.transform import AutoStructify + def setup(app): - app.add_transform(AutoStructify) \ No newline at end of file + app.add_transform(AutoStructify) diff --git a/multiview_platform/Exec.py b/multiview_platform/Exec.py index ce02d712de8212e99aeb421f12762b92889a53d9..56a58c1defafb0637ad15bb85b315bc710da3106 100644 --- a/multiview_platform/Exec.py +++ b/multiview_platform/Exec.py @@ -10,5 +10,5 @@ def Exec(): ExecClassif.execClassif(sys.argv[1:]) -if __name__=="__main__": - Exec() \ No newline at end of file +if __name__ == "__main__": + Exec() diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py index 2e50963c731869f60423ae5806fb223a4eb8650c..31b56ffd453b1cb21472684c77a0294ddc9bd9c8 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py +++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py @@ -8,9 +8,9 @@ import time import matplotlib import numpy as np from joblib import Parallel, delayed -import h5py -matplotlib.use('Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure +matplotlib.use( + 'Agg') # Anti-Grain Geometry C++ library to make a raster (pixel) image of the figure # Import own modules from . import MonoviewClassifiers @@ -18,16 +18,16 @@ from . import MultiviewClassifiers from .Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore from .Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore from .utils import GetMultiviewDb as DB -from .ResultAnalysis import getResults #resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, +from .ResultAnalysis import \ + getResults # resultAnalysis, analyzeLabels, analyzeIterResults, analyzeIterLabels, genNamesFromRes, from .utils import execution, Dataset, Multiclass -from . import Metrics # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): +def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): r"""Used to create a list of all the algorithm packages names used for the benchmark. First this function will check if the benchmark need mono- or/and multiview algorithms and adds to the right @@ -53,11 +53,14 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): """ benchmark = {"Monoview": {}, "Multiview": {}} allMultiviewPackages = [name for _, name, isPackage - in pkgutil.iter_modules(['./MonoMultiViewClassifiers/MultiviewClassifiers/']) if isPackage] + in pkgutil.iter_modules( + ['./MonoMultiViewClassifiers/MultiviewClassifiers/']) if isPackage] if "Monoview" in CL_type: if monoviewAlgos == ['']: - benchmark["Monoview"] = [name for _, name, isPackage in pkgutil.iter_modules(["./MonoMultiViewClassifiers/MonoviewClassifiers"]) + benchmark["Monoview"] = [name for _, name, isPackage in + pkgutil.iter_modules([ + "./MonoMultiViewClassifiers/MonoviewClassifiers"]) if not isPackage] else: @@ -71,19 +74,26 @@ def initBenchmark(CL_type, monoviewAlgos, multiviewAlgos, args): algosMutliview = multiviewAlgos for multiviewPackageName in allMultiviewPackages: if multiviewPackageName in algosMutliview: - multiviewPackage = getattr(MultiviewClassifiers, multiviewPackageName) - multiviewModule = getattr(multiviewPackage, multiviewPackageName+"Module") + multiviewPackage = getattr(MultiviewClassifiers, + multiviewPackageName) + multiviewModule = getattr(multiviewPackage, + multiviewPackageName + "Module") benchmark = multiviewModule.getBenchmark(benchmark, args=args) if CL_type == ["Benchmark"]: allMonoviewAlgos = [name for _, name, isPackage in - pkgutil.iter_modules(['./MonoMultiViewClassifiers/MonoviewClassifiers']) + pkgutil.iter_modules([ + './MonoMultiViewClassifiers/MonoviewClassifiers']) if (not isPackage) and name not in ["framework"]] benchmark["Monoview"] = allMonoviewAlgos - benchmark["Multiview"] = dict((multiviewPackageName, "_") for multiviewPackageName in allMultiviewPackages) + benchmark["Multiview"] = dict( + (multiviewPackageName, "_") for multiviewPackageName in + allMultiviewPackages) for multiviewPackageName in allMultiviewPackages: - multiviewPackage = getattr(MultiviewClassifiers, multiviewPackageName) - multiviewModule = getattr(multiviewPackage, multiviewPackageName+"Module") + multiviewPackage = getattr(MultiviewClassifiers, + multiviewPackageName) + multiviewModule = getattr(multiviewPackage, + multiviewPackageName + "Module") benchmark = multiviewModule.getBenchmark(benchmark, args=args) return benchmark @@ -107,9 +117,9 @@ def genViewsDictionnary(DATASET, views): datasetsNames = DATASET.keys() viewsDictionary = {} for datasetName in datasetsNames: - if datasetName[:4]=="View": + if datasetName[:4] == "View": viewName = DATASET.get(datasetName).attrs["name"] - if type(viewName)==bytes: + if type(viewName) == bytes: viewName = viewName.decode("utf-8") if viewName in views: viewsDictionary[viewName] = int(datasetName[4:]) @@ -141,14 +151,16 @@ def initMonoviewExps(benchmark, viewsDictionary, nbClass, kwargsInit): benchmark : Dictionary of dictionaries Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ - argumentDictionaries = {"Monoview":[], "Multiview":[]} + argumentDictionaries = {"Monoview": [], "Multiview": []} if benchmark["Monoview"]: argumentDictionaries["Monoview"] = [] for viewName, viewIndex in viewsDictionary.items(): for classifier in benchmark["Monoview"]: arguments = { - "args": {classifier + "KWARGS": kwargsInit[classifier + "KWARGSInit"], "feat": viewName, - "CL_type": classifier, "nbClass": nbClass}, "viewIndex": viewIndex} + "args": {classifier + "KWARGS": kwargsInit[ + classifier + "KWARGSInit"], "feat": viewName, + "CL_type": classifier, "nbClass": nbClass}, + "viewIndex": viewIndex} argumentDictionaries["Monoview"].append(arguments) return argumentDictionaries @@ -176,9 +188,12 @@ def initMonoviewKWARGS(args, classifiersNames): try: classifierModule = getattr(MonoviewClassifiers, classifiersName) except AttributeError: - raise AttributeError(classifiersName+" is not implemented in MonoviewClassifiers, " - "please specify the name of the file in MonoviewClassifiers") - monoviewKWARGS[classifiersName + "KWARGSInit"] = classifierModule.formatCmdArgs(args) + raise AttributeError( + classifiersName + " is not implemented in MonoviewClassifiers, " + "please specify the name of the file in MonoviewClassifiers") + monoviewKWARGS[ + classifiersName + "KWARGSInit"] = classifierModule.formatCmdArgs( + args) logging.debug("Done:\t Initializing Monoview classifiers arguments") return monoviewKWARGS @@ -188,7 +203,8 @@ def initKWARGSFunc(args, benchmark): return monoviewKWARGS -def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, +def initMultiviewArguments(args, benchmark, views, viewsIndices, + argumentDictionaries, randomState, directory, resultsMonoview, classificationIndices): """Used to add each monoview exeperience args to the list of monoview experiences args""" logging.debug("Start:\t Initializing Multiview classifiers arguments") @@ -196,10 +212,15 @@ def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDiction if "Multiview" in benchmark: for multiviewAlgoName in benchmark["Multiview"]: multiviewPackage = getattr(MultiviewClassifiers, multiviewAlgoName) - mutliviewModule = getattr(multiviewPackage, multiviewAlgoName+"Module") - - multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory, - resultsMonoview, classificationIndices) + mutliviewModule = getattr(multiviewPackage, + multiviewAlgoName + "Module") + + multiviewArguments += mutliviewModule.getArgs(args, benchmark, + views, viewsIndices, + randomState, + directory, + resultsMonoview, + classificationIndices) argumentDictionaries["Multiview"] = multiviewArguments logging.debug("Start:\t Initializing Multiview classifiers arguments") return argumentDictionaries @@ -218,7 +239,8 @@ def arangeMetrics(metrics, metricPrinc): return metrics -def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds): +def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, + kFolds): logging.debug("Start:\t Benchmark initialization") if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): try: @@ -229,85 +251,121 @@ def benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, k trainIndices = classificationIndices[0] trainLabels = labels[trainIndices] np.savetxt(directory + "train_labels.csv", trainLabels, delimiter=",") - np.savetxt(directory + "train_indices.csv", classificationIndices[0], delimiter=",") + np.savetxt(directory + "train_indices.csv", classificationIndices[0], + delimiter=",") resultsMonoview = [] folds = kFolds.split(np.arange(len(trainLabels)), trainLabels) - minFoldLen = int(len(trainLabels)/kFolds.n_splits) + minFoldLen = int(len(trainLabels) / kFolds.n_splits) for foldIndex, (trainCVIndices, testCVIndices) in enumerate(folds): - fileName = directory+"/folds/test_labels_fold_"+str(foldIndex)+".csv" + fileName = directory + "/folds/test_labels_fold_" + str( + foldIndex) + ".csv" if not os.path.exists(os.path.dirname(fileName)): try: os.makedirs(os.path.dirname(fileName)) except OSError as exc: if exc.errno != errno.EEXIST: raise - np.savetxt(fileName, trainLabels[testCVIndices[:minFoldLen]], delimiter=",") + np.savetxt(fileName, trainLabels[testCVIndices[:minFoldLen]], + delimiter=",") labelsNames = list(LABELS_DICTIONARY.values()) logging.debug("Done:\t Benchmark initialization") return resultsMonoview, labelsNames -def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, - kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore, +def execOneBenchmark(coreIndex=-1, LABELS_DICTIONARY=None, directory=None, + classificationIndices=None, args=None, + kFolds=None, randomState=None, hyperParamSearch=None, + metrics=None, argumentDictionaries=None, + benchmark=None, views=None, viewsIndices=None, flag=None, + labels=None, + ExecMonoview_multicore=ExecMonoview_multicore, + ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): """Used to run a benchmark using one core. ExecMonoview_multicore, initMultiviewArguments and ExecMultiview_multicore args are only used for tests""" - resultsMonoview, labelsNames = benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds) + resultsMonoview, labelsNames = benchmarkInit(directory, + classificationIndices, labels, + LABELS_DICTIONARY, kFolds) logging.debug("Start:\t Monoview benchmark") - resultsMonoview += [ExecMonoview_multicore(directory, args.name, labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, randomState, labels, - hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_HPS_iter, **argument) - for argument in argumentDictionaries["Monoview"]] + resultsMonoview += [ + ExecMonoview_multicore(directory, args.name, labelsNames, + classificationIndices, kFolds, + coreIndex, args.type, args.pathF, randomState, + labels, + hyperParamSearch=hyperParamSearch, + metrics=metrics, + nIter=args.CL_HPS_iter, **argument) + for argument in argumentDictionaries["Monoview"]] logging.debug("Done:\t Monoview benchmark") logging.debug("Start:\t Multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, - randomState, directory, resultsMonoview, classificationIndices) + argumentDictionaries = initMultiviewArguments(args, benchmark, views, + viewsIndices, + argumentDictionaries, + randomState, directory, + resultsMonoview, + classificationIndices) logging.debug("Done:\t Multiview arguments initialization") logging.debug("Start:\t Multiview benchmark") resultsMultiview = [ - ExecMultiview_multicore(directory, coreIndex, args.name, classificationIndices, kFolds, args.type, - args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_HPS_iter, **arguments) + ExecMultiview_multicore(directory, coreIndex, args.name, + classificationIndices, kFolds, args.type, + args.pathF, LABELS_DICTIONARY, randomState, + labels, hyperParamSearch=hyperParamSearch, + metrics=metrics, nIter=args.CL_HPS_iter, + **arguments) for arguments in argumentDictionaries["Multiview"]] logging.debug("Done:\t Multiview benchmark") return [flag, resultsMonoview + resultsMultiview] -def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, - kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, +def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, + directory=None, classificationIndices=None, + args=None, + kFolds=None, randomState=None, + hyperParamSearch=None, metrics=None, + argumentDictionaries=None, + benchmark=None, views=None, viewsIndices=None, + flag=None, labels=None, ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): """Used to run a benchmark using multiple cores. ExecMonoview_multicore, initMultiviewArguments and ExecMultiview_multicore args are only used for tests""" - resultsMonoview, labelsNames = benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds) + resultsMonoview, labelsNames = benchmarkInit(directory, + classificationIndices, labels, + LABELS_DICTIONARY, kFolds) logging.debug("Start:\t Monoview benchmark") nbExperiments = len(argumentDictionaries["Monoview"]) nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) for stepIndex in range(nbMulticoreToDo): resultsMonoview += (Parallel(n_jobs=nbCores)( - delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, classificationIndices, kFolds, - coreIndex, args.type, args.pathF, randomState, labels, + delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, + classificationIndices, kFolds, + coreIndex, args.type, args.pathF, + randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_HPS_iter, - **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) - for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) + metrics=metrics, + nIter=args.CL_HPS_iter, + **argumentDictionaries["Monoview"][ + coreIndex + stepIndex * nbCores]) + for coreIndex in + range(min(nbCores, nbExperiments - stepIndex * nbCores)))) logging.debug("Done:\t Monoview benchmark") logging.debug("Start:\t Multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, - randomState, directory, resultsMonoview, classificationIndices) + argumentDictionaries = initMultiviewArguments(args, benchmark, views, + viewsIndices, + argumentDictionaries, + randomState, directory, + resultsMonoview, + classificationIndices) logging.debug("Done:\t Multiview arguments initialization") logging.debug("Start:\t Multiview benchmark") @@ -316,55 +374,83 @@ def execOneBenchmark_multicore(nbCores=-1, LABELS_DICTIONARY=None, directory=Non nbMulticoreToDo = int(math.ceil(float(nbExperiments) / nbCores)) for stepIndex in range(nbMulticoreToDo): resultsMultiview += Parallel(n_jobs=nbCores)( - delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, classificationIndices, kFolds, - args.type, args.pathF, LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_HPS_iter, - **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) - for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) + delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, + classificationIndices, kFolds, + args.type, args.pathF, + LABELS_DICTIONARY, randomState, + labels, + hyperParamSearch=hyperParamSearch, + metrics=metrics, + nIter=args.CL_HPS_iter, + ** + argumentDictionaries["Multiview"][ + stepIndex * nbCores + coreIndex]) + for coreIndex in + range(min(nbCores, nbExperiments - stepIndex * nbCores))) logging.debug("Done:\t Multiview benchmark") return [flag, resultsMonoview + resultsMultiview] -def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, directory=None, classificationIndices=None, args=None, - kFolds=None, randomState=None, hyperParamSearch=None, metrics=None, argumentDictionaries=None, - benchmark=None, views=None, viewsIndices=None, flag=None, labels=None, - ExecMonoview_multicore=ExecMonoview_multicore, ExecMultiview_multicore=ExecMultiview_multicore, +def execOneBenchmarkMonoCore(DATASET=None, LABELS_DICTIONARY=None, + directory=None, classificationIndices=None, + args=None, + kFolds=None, randomState=None, + hyperParamSearch=None, metrics=None, + argumentDictionaries=None, + benchmark=None, views=None, viewsIndices=None, + flag=None, labels=None, + ExecMonoview_multicore=ExecMonoview_multicore, + ExecMultiview_multicore=ExecMultiview_multicore, initMultiviewArguments=initMultiviewArguments): - - resultsMonoview, labelsNames = benchmarkInit(directory, classificationIndices, labels, LABELS_DICTIONARY, kFolds) + resultsMonoview, labelsNames = benchmarkInit(directory, + classificationIndices, labels, + LABELS_DICTIONARY, kFolds) logging.debug("Start:\t Monoview benchmark") for arguments in argumentDictionaries["Monoview"]: - X = DATASET.get("View"+str(arguments["viewIndex"])) + X = DATASET.get("View" + str(arguments["viewIndex"])) Y = labels - resultsMonoview += [ExecMonoview(directory, X, Y, args.name, labelsNames, classificationIndices, kFolds, - 1, args.type, args.pathF, randomState, - hyperParamSearch=hyperParamSearch, metrics=metrics, - nIter=args.CL_HPS_iter, **arguments)] + resultsMonoview += [ + ExecMonoview(directory, X, Y, args.name, labelsNames, + classificationIndices, kFolds, + 1, args.type, args.pathF, randomState, + hyperParamSearch=hyperParamSearch, metrics=metrics, + nIter=args.CL_HPS_iter, **arguments)] logging.debug("Done:\t Monoview benchmark") logging.debug("Start:\t Multiview arguments initialization") - argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, - randomState, directory, resultsMonoview, classificationIndices) + argumentDictionaries = initMultiviewArguments(args, benchmark, views, + viewsIndices, + argumentDictionaries, + randomState, directory, + resultsMonoview, + classificationIndices) logging.debug("Done:\t Multiview arguments initialization") logging.debug("Start:\t Multiview benchmark") resultsMultiview = [] for arguments in argumentDictionaries["Multiview"]: resultsMultiview += [ - ExecMultiview(directory, DATASET, args.name, classificationIndices, kFolds, 1, args.type, - args.pathF, LABELS_DICTIONARY, randomState, labels, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=args.CL_HPS_iter, **arguments)] + ExecMultiview(directory, DATASET, args.name, classificationIndices, + kFolds, 1, args.type, + args.pathF, LABELS_DICTIONARY, randomState, labels, + hyperParamSearch=hyperParamSearch, + metrics=metrics, nIter=args.CL_HPS_iter, **arguments)] logging.debug("Done:\t Multiview benchmark") return [flag, resultsMonoview + resultsMultiview] -def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, classificationIndices, directories, - directory, multiClassLabels, metrics, labelsDictionary, nbLabels, DATASET, - execOneBenchmark=execOneBenchmark, execOneBenchmark_multicore=execOneBenchmark_multicore, - execOneBenchmarkMonoCore=execOneBenchmarkMonoCore, getResults=getResults, delete=DB.deleteHDF5): +def execBenchmark(nbCores, statsIter, nbMulticlass, + benchmarkArgumentsDictionaries, classificationIndices, + directories, + directory, multiClassLabels, metrics, labelsDictionary, + nbLabels, DATASET, + execOneBenchmark=execOneBenchmark, + execOneBenchmark_multicore=execOneBenchmark_multicore, + execOneBenchmarkMonoCore=execOneBenchmarkMonoCore, + getResults=getResults, delete=DB.deleteHDF5): r"""Used to execute the needed benchmark(s) on multicore or mono-core functions. Parameters @@ -414,24 +500,32 @@ def execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentsDictionari for stepIndex in nbMulticoreToDo: results += (Parallel(n_jobs=nbCores)(delayed(execOneBenchmark) (coreIndex=coreIndex, - **benchmarkArgumentsDictionaries[coreIndex + stepIndex * nbCores]) - for coreIndex in range(min(nbCores, nbExpsToDo - stepIndex * nbCores)))) + ** + benchmarkArgumentsDictionaries[ + coreIndex + stepIndex * nbCores]) + for coreIndex in range( + min(nbCores, nbExpsToDo - stepIndex * nbCores)))) else: - results += [execOneBenchmark_multicore(nbCores=nbCores, **benchmarkArgumentsDictionaries[0])] + results += [execOneBenchmark_multicore(nbCores=nbCores, ** + benchmarkArgumentsDictionaries[0])] else: for arguments in benchmarkArgumentsDictionaries: results += [execOneBenchmarkMonoCore(DATASET=DATASET, **arguments)] logging.debug("Done:\t Executing all the needed biclass benchmarks") if nbCores > 1: - logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") + logging.debug("Start:\t Deleting " + str( + nbCores) + " temporary datasets for multiprocessing") args = benchmarkArgumentsDictionaries[0]["args"] datasetFiles = delete(args.pathF, args.name, nbCores) logging.debug("Start:\t Deleting datasets for multiprocessing") # Do everything with flagging - nbExamples = len(classificationIndices[0][0])+len(classificationIndices[0][1]) + nbExamples = len(classificationIndices[0][0]) + len( + classificationIndices[0][1]) multiclassGroundTruth = DATASET.get("Labels").value logging.debug("Start:\t Analyzing predictions") - getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiclassGroundTruth, metrics, classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels) + getResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, + multiclassGroundTruth, metrics, classificationIndices, + directories, directory, labelsDictionary, nbExamples, nbLabels) logging.debug("Done:\t Analyzing predictions") return results @@ -453,22 +547,33 @@ def execClassif(arguments): monoviewAlgos = args.CL_algos_monoview multiviewAlgos = args.CL_algos_multiview - directory = execution.initLogFile(args.name, args.views, args.CL_type, args.log, args.debug, args.label, args.res_dir) + directory = execution.initLogFile(args.name, args.views, args.CL_type, + args.log, args.debug, args.label, + args.res_dir) randomState = execution.initRandomState(args.randomState, directory) - statsIterRandomStates = execution.initStatsIterRandomStates(statsIter,randomState) - - getDatabase = execution.getDatabaseFunction(args.name,args.type) - - - DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass, - args.CL_classes, randomState, args.full, args.add_noise, args.noise_std) + statsIterRandomStates = execution.initStatsIterRandomStates(statsIter, + randomState) + + getDatabase = execution.getDatabaseFunction(args.name, args.type) + + DATASET, LABELS_DICTIONARY, datasetname = getDatabase(args.views, + args.pathF, args.name, + args.CL_nbClass, + args.CL_classes, + randomState, + args.full, + args.add_noise, + args.noise_std) args.name = datasetname - splits = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates) + splits = execution.genSplits(DATASET.get("Labels").value, args.CL_split, + statsIterRandomStates) - multiclassLabels, labelsCombinations, indicesMulticlass = Multiclass.genMulticlassLabels(DATASET.get("Labels").value, multiclassMethod, splits) + multiclassLabels, labelsCombinations, indicesMulticlass = Multiclass.genMulticlassLabels( + DATASET.get("Labels").value, multiclassMethod, splits) - kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, statsIterRandomStates) + kFolds = execution.genKFolds(statsIter, args.CL_nbFolds, + statsIterRandomStates) datasetFiles = Dataset.initMultipleDatasets(args.pathF, args.name, nbCores) @@ -483,7 +588,11 @@ def execClassif(arguments): metrics = [metric.split(":") for metric in args.CL_metrics] if metrics == [[""]]: metricsNames = [name for _, name, isPackage - in pkgutil.iter_modules(['./MonoMultiViewClassifiers/Metrics']) if not isPackage and name not in ["framework", "log_loss", "matthews_corrcoef", "roc_auc_score"]] + in pkgutil.iter_modules( + ['./MonoMultiViewClassifiers/Metrics']) if + not isPackage and name not in ["framework", "log_loss", + "matthews_corrcoef", + "roc_auc_score"]] metrics = [[metricName] for metricName in metricsNames] metrics = arangeMetrics(metrics, args.CL_metric_princ) for metricIndex, metric in enumerate(metrics): @@ -496,28 +605,22 @@ def execClassif(arguments): dataBaseTime = time.time() - start - argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, NB_CLASS, initKWARGS) + argumentDictionaries = initMonoviewExps(benchmark, viewsDictionary, + NB_CLASS, initKWARGS) directories = execution.genDirecortiesNames(directory, statsIter) - benchmarkArgumentDictionaries = execution.genArgumentDictionaries(LABELS_DICTIONARY, directories, multiclassLabels, - labelsCombinations, indicesMulticlass, - hyperParamSearch, args, kFolds, - statsIterRandomStates, metrics, - argumentDictionaries, benchmark, nbViews, - views, viewsIndices) + benchmarkArgumentDictionaries = execution.genArgumentDictionaries( + LABELS_DICTIONARY, directories, multiclassLabels, + labelsCombinations, indicesMulticlass, + hyperParamSearch, args, kFolds, + statsIterRandomStates, metrics, + argumentDictionaries, benchmark, nbViews, + views, viewsIndices) nbMulticlass = len(labelsCombinations) - execBenchmark(nbCores, statsIter, nbMulticlass, benchmarkArgumentDictionaries, splits, directories, - directory, multiclassLabels, metrics, LABELS_DICTIONARY, NB_CLASS, DATASET) - - - - - - - - - - + execBenchmark(nbCores, statsIter, nbMulticlass, + benchmarkArgumentDictionaries, splits, directories, + directory, multiclassLabels, metrics, LABELS_DICTIONARY, + NB_CLASS, DATASET) # # def classifyOneIter_multicore(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory, args, classificationIndices, @@ -623,102 +726,102 @@ def execClassif(arguments): # "s, Iteration Analysis Time : " + str(int(globalAnalysisTime)) + # "s, Iteration Duration : " + str(int(totalTime)) + "s") # return results, labelAnalysis - # - # - # - # - # - # - # - # if statsIter > 1: - # logging.debug("Start:\t Benchmark classification") - # for statIterIndex in range(statsIter): - # if not os.path.exists(os.path.dirname(directories[statIterIndex] + "train_labels.csv")): - # try: - # os.makedirs(os.path.dirname(directories[statIterIndex] + "train_labels.csv")) - # except OSError as exc: - # if exc.errno != errno.EEXIST: - # raise - # trainIndices, testIndices = classificationIndices[statIterIndex] - # trainLabels = DATASET.get("Labels").value[trainIndices] - # np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",") - # if nbCores > 1: - # iterResults = [] - # nbExperiments = statsIter*len(multiclassLabels) - # for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): - # iterResults += (Parallel(n_jobs=nbCores)( - # delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, - # directories[coreIndex + stepIndex * nbCores], args, - # classificationIndices[coreIndex + stepIndex * nbCores], - # kFolds[coreIndex + stepIndex * nbCores], - # statsIterRandomStates[coreIndex + stepIndex * nbCores], - # hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, - # start, benchmark, - # views) - # for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) - # logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") - # datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) - # logging.debug("Start:\t Deleting datasets for multiprocessing") - # else: - # iterResults = [] - # for iterIndex in range(statsIter): - # if not os.path.exists(os.path.dirname(directories[iterIndex] + "train_labels.csv")): - # try: - # os.makedirs(os.path.dirname(directories[iterIndex] + "train_labels.csv")) - # except OSError as exc: - # if exc.errno != errno.EEXIST: - # raise - # trainIndices, testIndices = classificationIndices[iterIndex] - # trainLabels = DATASET.get("Labels").value[trainIndices] - # np.savetxt(directories[iterIndex] + "train_labels.csv", trainLabels, delimiter=",") - # iterResults.append( - # classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, - # classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], - # hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, - # views)) - # logging.debug("Done:\t Benchmark classification") - # logging.debug("Start:\t Global Results Analysis") - # classifiersIterResults = [] - # iterLabelAnalysis = [] - # for result in iterResults: - # classifiersIterResults.append(result[0]) - # iterLabelAnalysis.append(result[1]) - # - # mono,multi = classifiersIterResults[0] - # classifiersNames = genNamesFromRes(mono, multi) - # analyzeIterLabels(iterLabelAnalysis, directory, classifiersNames) - # analyzeIterResults(classifiersIterResults, args.name, metrics, directory) - # logging.debug("Done:\t Global Results Analysis") - # totalDur = time.time() - start - # m, s = divmod(totalDur, 60) - # h, m = divmod(m, 60) - # d, h = divmod(h, 24) - # # print "%d_%02d_%02d" % (h, m, s) - # logging.info("Info:\t Total duration : " + str(d) + " days, " + str(h) + " hours, " + str(m) + " mins, " + str( - # int(s)) + "secs.") - # - # else: - # logging.debug("Start:\t Benchmark classification") - # if not os.path.exists(os.path.dirname(directories + "train_labels.csv")): - # try: - # os.makedirs(os.path.dirname(directories + "train_labels.csv")) - # except OSError as exc: - # if exc.errno != errno.EEXIST: - # raise - # trainIndices, testIndices = classificationIndices - # trainLabels = DATASET.get("Labels").value[trainIndices] - # np.savetxt(directories + "train_labels.csv", trainLabels, delimiter=",") - # res, labelAnalysis = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, - # kFolds, - # statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, - # benchmark, views) - # logging.debug("Done:\t Benchmark classification") - # totalDur = time.time()-start - # m, s = divmod(totalDur, 60) - # h, m = divmod(m, 60) - # d, h = divmod(h, 24) - # # print "%d_%02d_%02d" % (h, m, s) - # logging.info("Info:\t Total duration : "+str(d)+ " days, "+str(h)+" hours, "+str(m)+" mins, "+str(int(s))+"secs.") - # - # if statsIter > 1: - # pass +# +# +# +# +# +# +# +# if statsIter > 1: +# logging.debug("Start:\t Benchmark classification") +# for statIterIndex in range(statsIter): +# if not os.path.exists(os.path.dirname(directories[statIterIndex] + "train_labels.csv")): +# try: +# os.makedirs(os.path.dirname(directories[statIterIndex] + "train_labels.csv")) +# except OSError as exc: +# if exc.errno != errno.EEXIST: +# raise +# trainIndices, testIndices = classificationIndices[statIterIndex] +# trainLabels = DATASET.get("Labels").value[trainIndices] +# np.savetxt(directories[statIterIndex] + "train_labels.csv", trainLabels, delimiter=",") +# if nbCores > 1: +# iterResults = [] +# nbExperiments = statsIter*len(multiclassLabels) +# for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): +# iterResults += (Parallel(n_jobs=nbCores)( +# delayed(classifyOneIter_multicore)(LABELS_DICTIONARY, argumentDictionaries, 1, +# directories[coreIndex + stepIndex * nbCores], args, +# classificationIndices[coreIndex + stepIndex * nbCores], +# kFolds[coreIndex + stepIndex * nbCores], +# statsIterRandomStates[coreIndex + stepIndex * nbCores], +# hyperParamSearch, metrics, coreIndex, viewsIndices, dataBaseTime, +# start, benchmark, +# views) +# for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) +# logging.debug("Start:\t Deleting " + str(nbCores) + " temporary datasets for multiprocessing") +# datasetFiles = DB.deleteHDF5(args.pathF, args.name, nbCores) +# logging.debug("Start:\t Deleting datasets for multiprocessing") +# else: +# iterResults = [] +# for iterIndex in range(statsIter): +# if not os.path.exists(os.path.dirname(directories[iterIndex] + "train_labels.csv")): +# try: +# os.makedirs(os.path.dirname(directories[iterIndex] + "train_labels.csv")) +# except OSError as exc: +# if exc.errno != errno.EEXIST: +# raise +# trainIndices, testIndices = classificationIndices[iterIndex] +# trainLabels = DATASET.get("Labels").value[trainIndices] +# np.savetxt(directories[iterIndex] + "train_labels.csv", trainLabels, delimiter=",") +# iterResults.append( +# classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories[iterIndex], args, +# classificationIndices[iterIndex], kFolds[iterIndex], statsIterRandomStates[iterIndex], +# hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, benchmark, +# views)) +# logging.debug("Done:\t Benchmark classification") +# logging.debug("Start:\t Global Results Analysis") +# classifiersIterResults = [] +# iterLabelAnalysis = [] +# for result in iterResults: +# classifiersIterResults.append(result[0]) +# iterLabelAnalysis.append(result[1]) +# +# mono,multi = classifiersIterResults[0] +# classifiersNames = genNamesFromRes(mono, multi) +# analyzeIterLabels(iterLabelAnalysis, directory, classifiersNames) +# analyzeIterResults(classifiersIterResults, args.name, metrics, directory) +# logging.debug("Done:\t Global Results Analysis") +# totalDur = time.time() - start +# m, s = divmod(totalDur, 60) +# h, m = divmod(m, 60) +# d, h = divmod(h, 24) +# # print "%d_%02d_%02d" % (h, m, s) +# logging.info("Info:\t Total duration : " + str(d) + " days, " + str(h) + " hours, " + str(m) + " mins, " + str( +# int(s)) + "secs.") +# +# else: +# logging.debug("Start:\t Benchmark classification") +# if not os.path.exists(os.path.dirname(directories + "train_labels.csv")): +# try: +# os.makedirs(os.path.dirname(directories + "train_labels.csv")) +# except OSError as exc: +# if exc.errno != errno.EEXIST: +# raise +# trainIndices, testIndices = classificationIndices +# trainLabels = DATASET.get("Labels").value[trainIndices] +# np.savetxt(directories + "train_labels.csv", trainLabels, delimiter=",") +# res, labelAnalysis = classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directories, args, classificationIndices, +# kFolds, +# statsIterRandomStates, hyperParamSearch, metrics, DATASET, viewsIndices, dataBaseTime, start, +# benchmark, views) +# logging.debug("Done:\t Benchmark classification") +# totalDur = time.time()-start +# m, s = divmod(totalDur, 60) +# h, m = divmod(m, 60) +# d, h = divmod(h, 24) +# # print "%d_%02d_%02d" % (h, m, s) +# logging.info("Info:\t Total duration : "+str(d)+ " days, "+str(h)+" hours, "+str(m)+" mins, "+str(int(s))+"secs.") +# +# if statsIter > 1: +# pass diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py index e954a8f8d3cdc6598b4db7649a29d52c15e0b103..22ded243a535227876eb8471aafb7aefbe883660 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py @@ -24,9 +24,10 @@ Define a getConfig function """ import os + for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): if module in ['__init__.py'] or module[-3:] != '.py': continue __import__(module[:-3], locals(), globals(), [], 1) pass -del os \ No newline at end of file +del os diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/accuracy_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/accuracy_score.py index 8676216759cfdcc297939c8b04e83b32340bb803..44d43f5d148a1a7af2fae17de4c71fe3542e5893 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/accuracy_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/accuracy_score.py @@ -14,7 +14,7 @@ __status__ = "Prototype" # Production, Development, Prototype def score(y_true, y_pred, multiclass=False, **kwargs): """Arguments: y_true: real labels - y_pred predicted labels + y_pred: predicted labels Keyword Arguments: "0": weights to compute accuracy @@ -39,7 +39,8 @@ def get_scorer(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight) + return make_scorer(metric, greater_is_better=True, + sample_weight=sample_weight) def getConfig(**kwargs): @@ -47,5 +48,6 @@ def getConfig(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - configString = "Accuracy score using " + str(sample_weight) + " as sample_weights (higher is better)" + configString = "Accuracy score using " + str( + sample_weight) + " as sample_weights (higher is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/f1_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/f1_score.py index 13633903807e07ccca84f0c3e5f81b79b51741ec..8316daada938e7bfec03100204c4de41578f6d2e 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/f1_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/f1_score.py @@ -32,7 +32,8 @@ def score(y_true, y_pred, multiclass=False, **kwargs): else: average = "binary" - score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, + pos_label=pos_label, average=average) return score @@ -54,7 +55,8 @@ def get_scorer(**kwargs): average = kwargs["3"] except: average = "binary" - return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, labels=labels, + return make_scorer(metric, greater_is_better=True, + sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) @@ -75,7 +77,9 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "F1 score using " + str(sample_weight) + " as sample_weights, " + str(labels) + " as labels, " + str( + configString = "F1 score using " + str( + sample_weight) + " as sample_weights, " + str( + labels) + " as labels, " + str( pos_label) \ + " as pos_label, " + average + " as average (higher is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/fbeta_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/fbeta_score.py index aa2c9298720e44c18b45277aaabb4732032f0a20..13dd603f0404ad69ebd4cdcc06fcadee0d61e5c8 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/fbeta_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/fbeta_score.py @@ -30,7 +30,8 @@ def score(y_true, y_pred, multiclass=False, **kwargs): average = "micro" else: average = "binary" - score = metric(y_true, y_pred, beta, sample_weight=sample_weight, labels=labels, pos_label=pos_label, + score = metric(y_true, y_pred, beta, sample_weight=sample_weight, + labels=labels, pos_label=pos_label, average=average) return score @@ -56,7 +57,8 @@ def get_scorer(**kwargs): average = kwargs["4"] except: average = "binary" - return make_scorer(metric, greater_is_better=True, beta=beta, sample_weight=sample_weight, labels=labels, + return make_scorer(metric, greater_is_better=True, beta=beta, + sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) @@ -81,7 +83,9 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "F-beta score using " + str(sample_weight) + " as sample_weights, " + str( + configString = "F-beta score using " + str( + sample_weight) + " as sample_weights, " + str( labels) + " as labels, " + str(pos_label) \ - + " as pos_label, " + average + " as average, " + str(beta) + " as beta (higher is better)" + + " as pos_label, " + average + " as average, " + str( + beta) + " as beta (higher is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/hamming_loss.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/hamming_loss.py index 05d93da435f42d7086c05ee2c0ee85f54e789bac..e81c23fba2ed3207bcf388418bb717e8ed7a74b6 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/hamming_loss.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/hamming_loss.py @@ -28,5 +28,6 @@ def getConfig(**kwargs): classes = kwargs["0"] except: classes = None - configString = "Hamming loss using " + str(classes) + " as classes (lower is better)" + configString = "Hamming loss using " + str( + classes) + " as classes (lower is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py index d885d596051a0116505011cb59878ec006b08c8d..2ac897684c92900211c8f89a06b665d34228d1b9 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/jaccard_similarity_score.py @@ -20,7 +20,8 @@ def get_scorer(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight) + return make_scorer(metric, greater_is_better=True, + sample_weight=sample_weight) def getConfig(**kwargs): @@ -28,5 +29,6 @@ def getConfig(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - configString = "Jaccard_similarity score using " + str(sample_weight) + " as sample_weights (higher is better)" + configString = "Jaccard_similarity score using " + str( + sample_weight) + " as sample_weights (higher is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/log_loss.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/log_loss.py index 4a771e42e0a444b590f7340cf60b01b3924d863e..3db04eb031f40e0adceded8fcb133cec12ca580a 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/log_loss.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/log_loss.py @@ -28,7 +28,8 @@ def get_scorer(**kwargs): eps = kwargs["1"] except: eps = 1e-15 - return make_scorer(metric, greater_is_better=False, sample_weight=sample_weight, eps=eps) + return make_scorer(metric, greater_is_better=False, + sample_weight=sample_weight, eps=eps) def getConfig(**kwargs): @@ -40,6 +41,7 @@ def getConfig(**kwargs): eps = kwargs["1"] except: eps = 1e-15 - configString = "Log loss using " + str(sample_weight) + " as sample_weights, " + str( + configString = "Log loss using " + str( + sample_weight) + " as sample_weights, " + str( eps) + " as eps (lower is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/matthews_corrcoef.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/matthews_corrcoef.py index 5d69563f7fe4938ed77e3f205d76fd1dac7acc12..04731d4f0d994a850b96cd835438a77d0eda0144 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/matthews_corrcoef.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/matthews_corrcoef.py @@ -1,5 +1,5 @@ -from sklearn.metrics import matthews_corrcoef as metric from sklearn.metrics import make_scorer +from sklearn.metrics import matthews_corrcoef as metric # Author-Info __author__ = "Baptiste Bauvin" diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/precision_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/precision_score.py index a80878415893e3886dbd15400b8aea17ded20e7e..0332de0e73b79f4bf7a27ae051bfcd87d080632d 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/precision_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/precision_score.py @@ -1,5 +1,5 @@ -from sklearn.metrics import precision_score as metric from sklearn.metrics import make_scorer +from sklearn.metrics import precision_score as metric # Author-Info __author__ = "Baptiste Bauvin" @@ -26,7 +26,8 @@ def score(y_true, y_pred, multiclass=False, **kwargs): average = "micro" else: average = "binary" - score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, + pos_label=pos_label, average=average) return score @@ -47,7 +48,9 @@ def get_scorer(**kwargs): average = kwargs["3"] except: average = "binary" - return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, labels=labels, pos_label=pos_label, + return make_scorer(metric, greater_is_better=True, + sample_weight=sample_weight, labels=labels, + pos_label=pos_label, average=average) @@ -68,7 +71,8 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "Precision score using " + str(sample_weight) + " as sample_weights, " + str( + configString = "Precision score using " + str( + sample_weight) + " as sample_weights, " + str( labels) + " as labels, " + str(pos_label) \ + " as pos_label, " + average + " as average (higher is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/recall_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/recall_score.py index 7ce329a933ec3852fa1a3bccf1de6fa65dc90898..13061c964ef36b9008cf6decc87affa80b706f2f 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/recall_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/recall_score.py @@ -1,5 +1,5 @@ -from sklearn.metrics import recall_score as metric from sklearn.metrics import make_scorer +from sklearn.metrics import recall_score as metric # Author-Info __author__ = "Baptiste Bauvin" @@ -26,7 +26,8 @@ def score(y_true, y_pred, multiclass=False, **kwargs): average = "micro" else: average = "binary" - score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, pos_label=pos_label, average=average) + score = metric(y_true, y_pred, sample_weight=sample_weight, labels=labels, + pos_label=pos_label, average=average) return score @@ -47,7 +48,9 @@ def get_scorer(**kwargs): average = kwargs["3"] except: average = "binary" - return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, labels=labels, pos_label=pos_label, + return make_scorer(metric, greater_is_better=True, + sample_weight=sample_weight, labels=labels, + pos_label=pos_label, average=average) @@ -68,7 +71,9 @@ def getConfig(**kwargs): average = kwargs["3"] except: average = "binary" - configString = "Recall score using " + str(sample_weight) + " as sample_weights, " + str( + configString = "Recall score using " + str( + sample_weight) + " as sample_weights, " + str( labels) + " as labels, " + str(pos_label) \ - + " as pos_label, " + average + " as average (higher is better)" + + " as pos_label, " + average + "as average (higher is " \ + "better) " return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py index c6bbfe2d43060c32d7abd34e73f11b89f606b922..e3d6ff4f624db92dfdeb8deb0b08d0d3c4113f57 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/roc_auc_score.py @@ -1,5 +1,5 @@ -from sklearn.metrics import roc_auc_score as metric from sklearn.metrics import make_scorer +from sklearn.metrics import roc_auc_score as metric from sklearn.preprocessing import MultiLabelBinarizer # Author-Info @@ -37,7 +37,8 @@ def get_scorer(**kwargs): average = kwargs["1"] except: average = "micro" - return make_scorer(metric, greater_is_better=True, sample_weight=sample_weight, average=average) + return make_scorer(metric, greater_is_better=True, + sample_weight=sample_weight, average=average) def getConfig(**kwargs): diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py index c946499b5cb1e6dc7c2c5037cc6fea0e7384794f..4c78adfc1e674628c300b20ac048bdef24855b96 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/zero_one_loss.py @@ -1,5 +1,5 @@ -from sklearn.metrics import zero_one_loss as metric from sklearn.metrics import make_scorer +from sklearn.metrics import zero_one_loss as metric # Author-Info __author__ = "Baptiste Bauvin" @@ -20,7 +20,8 @@ def get_scorer(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - return make_scorer(metric, greater_is_better=False, sample_weight=sample_weight) + return make_scorer(metric, greater_is_better=False, + sample_weight=sample_weight) def getConfig(**kwargs): @@ -28,5 +29,6 @@ def getConfig(**kwargs): sample_weight = kwargs["0"] except: sample_weight = None - configString = "Zero_one loss using " + str(sample_weight) + " as sample_weights (lower is better)" + configString = "Zero_one loss using " + str( + sample_weight) + " as sample_weights (lower is better)" return configString diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 3ac3c046ed335487c7e43339241cd1381d0501f0..5573f6269d86f4c82cc169efebcfa010db40c2be 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -1,11 +1,12 @@ +import datetime +import sys + +import matplotlib.pyplot as plt import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin from sklearn.preprocessing import LabelEncoder from sklearn.tree import DecisionTreeClassifier from sklearn.utils.validation import check_is_fitted -import sys -import matplotlib.pyplot as plt -import datetime class DecisionStumpClassifier(BaseEstimator, ClassifierMixin): @@ -22,6 +23,7 @@ class DecisionStumpClassifier(BaseEstimator, ClassifierMixin): will predict the "negative" class (generally -1 or 0), and if 1, the stump will predict the second class (generally 1). """ + def __init__(self, attribute_index, threshold, direction=1): super(DecisionStumpClassifier, self).__init__() self.attribute_index = attribute_index @@ -42,7 +44,8 @@ class DecisionStumpClassifier(BaseEstimator, ClassifierMixin): self.classes_ = self.le_.classes_ if not len(self.classes_) == 2: - raise ValueError('DecisionStumpsVoter only supports binary classification') + raise ValueError( + 'DecisionStumpsVoter only supports binary classification') # assert len(self.classes_) == 2, "DecisionStumpsVoter only supports binary classification" return self @@ -62,8 +65,10 @@ class DecisionStumpClassifier(BaseEstimator, ClassifierMixin): """ check_is_fitted(self, 'classes_') - import pdb;pdb.set_trace() - return self.le_.inverse_transform(np.argmax(self.predict_proba(X), axis=1)) + import pdb; + pdb.set_trace() + return self.le_.inverse_transform( + np.argmax(self.predict_proba(X), axis=1)) def predict_proba(self, X): """Compute probabilities of possible outcomes for samples in X. @@ -83,7 +88,8 @@ class DecisionStumpClassifier(BaseEstimator, ClassifierMixin): check_is_fitted(self, 'classes_') X = np.asarray(X) probas = np.zeros((X.shape[0], 2)) - positive_class = np.argwhere(X[:, self.attribute_index] > self.threshold) + positive_class = np.argwhere( + X[:, self.attribute_index] > self.threshold) negative_class = np.setdiff1d(range(X.shape[0]), positive_class) probas[positive_class, 1] = 1.0 probas[negative_class, 0] = 1.0 @@ -109,11 +115,12 @@ class DecisionStumpClassifier(BaseEstimator, ClassifierMixin): """ - X=np.ones(X.shape) + X = np.ones(X.shape) check_is_fitted(self, 'classes_') X = np.asarray(X) probas = np.zeros((X.shape[0], 2)) - positive_class = np.argwhere(X[:, self.attribute_index] > self.threshold) + positive_class = np.argwhere( + X[:, self.attribute_index] > self.threshold) negative_class = np.setdiff1d(range(X.shape[0]), positive_class) probas[positive_class, 1] = 1.0 probas[negative_class, 0] = 1.0 @@ -139,6 +146,7 @@ class ClassifiersGenerator(BaseEstimator, TransformerMixin): Once fit, contains the voter functions. """ + def __init__(self, self_complemented=False): super(ClassifiersGenerator, self).__init__() self.self_complemented = self_complemented @@ -177,6 +185,7 @@ class ClassifiersGenerator(BaseEstimator, TransformerMixin): check_is_fitted(self, 'estimators_') return np.array([voter.predict(X) for voter in self.estimators_]).T + # class TreesClassifiersGenerator(ClassifiersGenerator): # """A generator to widen the voter's pool of our boosting algorithms. # """ @@ -191,16 +200,19 @@ class ClassifiersGenerator(BaseEstimator, TransformerMixin): class TreeClassifiersGenerator(ClassifiersGenerator): - def __init__(self, random_state=42, max_depth=2, self_complemented=True, criterion="gini", splitter="best", n_trees=100, distribution_type="uniform", low=0, high=10, attributes_ratio=0.6, examples_ratio=0.95): + def __init__(self, random_state=42, max_depth=2, self_complemented=True, + criterion="gini", splitter="best", n_trees=100, + distribution_type="uniform", low=0, high=10, + attributes_ratio=0.6, examples_ratio=0.95): super(TreeClassifiersGenerator, self).__init__(self_complemented) - self.max_depth=max_depth - self.criterion=criterion - self.splitter=splitter - self.n_trees=n_trees + self.max_depth = max_depth + self.criterion = criterion + self.splitter = splitter + self.n_trees = n_trees if type(random_state) is int: self.random_state = np.random.RandomState(random_state) else: - self.random_state=random_state + self.random_state = random_state self.distribution_type = distribution_type self.low = low self.high = high @@ -209,23 +221,31 @@ class TreeClassifiersGenerator(ClassifiersGenerator): def fit(self, X, y=None): estimators_ = [] - self.attribute_indices = np.array([self.sub_sample_attributes(X) for _ in range(self.n_trees)]) - self.example_indices = np.array([self.sub_sample_examples(X) for _ in range(self.n_trees)]) + self.attribute_indices = np.array( + [self.sub_sample_attributes(X) for _ in range(self.n_trees)]) + self.example_indices = np.array( + [self.sub_sample_examples(X) for _ in range(self.n_trees)]) for i in range(self.n_trees): - estimators_.append(DecisionTreeClassifier(criterion=self.criterion, splitter=self.splitter, max_depth=self.max_depth).fit(X[:, self.attribute_indices[i, :]][self.example_indices[i], :], y[self.example_indices[i, :]])) + estimators_.append(DecisionTreeClassifier(criterion=self.criterion, + splitter=self.splitter, + max_depth=self.max_depth).fit( + X[:, self.attribute_indices[i, :]][self.example_indices[i], :], + y[self.example_indices[i, :]])) self.estimators_ = np.asarray(estimators_) return self def sub_sample_attributes(self, X): n_attributes = X.shape[1] attributes_indices = np.arange(n_attributes) - kept_indices = self.random_state.choice(attributes_indices, size=int(self.attributes_ratio*n_attributes), replace=True) + kept_indices = self.random_state.choice(attributes_indices, size=int( + self.attributes_ratio * n_attributes), replace=True) return kept_indices def sub_sample_examples(self, X): n_examples = X.shape[0] examples_indices = np.arange(n_examples) - kept_indices = self.random_state.choice(examples_indices, size=int(self.examples_ratio*n_examples), replace=True) + kept_indices = self.random_state.choice(examples_indices, size=int( + self.examples_ratio * n_examples), replace=True) return kept_indices def choose(self, chosen_columns): @@ -234,8 +254,6 @@ class TreeClassifiersGenerator(ClassifiersGenerator): self.example_indices = self.example_indices[chosen_columns, :] - - class StumpsClassifiersGenerator(ClassifiersGenerator): """Decision Stump Voters transformer. @@ -248,7 +266,9 @@ class StumpsClassifiersGenerator(ClassifiersGenerator): Whether or not a binary complement voter must be generated for each voter. Defaults to False. """ - def __init__(self, n_stumps_per_attribute=10, self_complemented=False, check_diff=False): + + def __init__(self, n_stumps_per_attribute=10, self_complemented=False, + check_diff=False): super(StumpsClassifiersGenerator, self).__init__(self_complemented) self.n_stumps_per_attribute = n_stumps_per_attribute self.check_diff = check_diff @@ -271,7 +291,7 @@ class StumpsClassifiersGenerator(ClassifiersGenerator): minimums = np.min(X, axis=0) maximums = np.max(X, axis=0) if y.ndim > 1: - y = np.reshape(y, (y.shape[0], )) + y = np.reshape(y, (y.shape[0],)) ranges = (maximums - minimums) / (self.n_stumps_per_attribute + 1) if self.check_diff: nb_differents = [np.unique(col) for col in np.transpose(X)] @@ -279,33 +299,61 @@ class StumpsClassifiersGenerator(ClassifiersGenerator): for i in range(X.shape[1]): nb_different = nb_differents[i].shape[0] different = nb_differents[i] - if nb_different-1 < self.n_stumps_per_attribute: + if nb_different - 1 < self.n_stumps_per_attribute: self.estimators_ += [DecisionStumpClassifier(i, - (different[stump_number]+different[ - stump_number+1])/2, 1).fit(X, y) - for stump_number in range(int(nb_different)-1)] + (different[ + stump_number] + + different[ + stump_number + 1]) / 2, + 1).fit(X, y) + for stump_number in + range(int(nb_different) - 1)] if self.self_complemented: self.estimators_ += [DecisionStumpClassifier(i, - (different[stump_number] + different[ - stump_number + 1]) / 2, -1).fit(X, y) - for stump_number in range(int(nb_different)-1)] + (different[ + stump_number] + + different[ + stump_number + 1]) / 2, + -1).fit(X, + y) + for stump_number in + range(int(nb_different) - 1)] else: - self.estimators_ += [DecisionStumpClassifier(i, minimums[i] + ranges[i] * stump_number, 1).fit(X, y) - for stump_number in range(1, self.n_stumps_per_attribute + 1) - if ranges[i] != 0] + self.estimators_ += [DecisionStumpClassifier(i, + minimums[i] + + ranges[ + i] * stump_number, + 1).fit(X, y) + for stump_number in range(1, + self.n_stumps_per_attribute + 1) + if ranges[i] != 0] if self.self_complemented: - self.estimators_ += [DecisionStumpClassifier(i, minimums[i] + ranges[i] * stump_number, -1).fit(X, y) - for stump_number in range(1, self.n_stumps_per_attribute + 1) + self.estimators_ += [DecisionStumpClassifier(i, + minimums[ + i] + + ranges[ + i] * stump_number, + -1).fit(X, + y) + for stump_number in range(1, + self.n_stumps_per_attribute + 1) if ranges[i] != 0] else: - self.estimators_ = [DecisionStumpClassifier(i, minimums[i] + ranges[i] * stump_number, 1).fit(X, y) - for i in range(X.shape[1]) for stump_number in range(1, self.n_stumps_per_attribute + 1) + self.estimators_ = [DecisionStumpClassifier(i, minimums[i] + ranges[ + i] * stump_number, 1).fit(X, y) + for i in range(X.shape[1]) for stump_number in + range(1, self.n_stumps_per_attribute + 1) if ranges[i] != 0] if self.self_complemented: - self.estimators_ += [DecisionStumpClassifier(i, minimums[i] + ranges[i] * stump_number, -1).fit(X, y) - for i in range(X.shape[1]) for stump_number in range(1, self.n_stumps_per_attribute + 1) + self.estimators_ += [DecisionStumpClassifier(i, minimums[i] + + ranges[ + i] * stump_number, + -1).fit(X, y) + for i in range(X.shape[1]) for stump_number + in + range(1, self.n_stumps_per_attribute + 1) if ranges[i] != 0] self.estimators_ = np.asarray(self.estimators_) return self @@ -357,9 +405,6 @@ def _as_line_matrix(array_like): return matrix - - - def sign(array): """Computes the elementwise sign of all elements of an array. The sign function returns -1 if x <=0 and 1 if x > 0. Note that numpy's sign function can return 0, which is not desirable in most cases in Machine Learning algorithms. @@ -396,6 +441,7 @@ class ConvexProgram(object): subject to G*x <= h A*x = b """ + def __init__(self): self._quadratic_func = None self._linear_func = None @@ -419,10 +465,10 @@ class ConvexProgram(object): def quadratic_func(self, quad_matrix): quad_matrix = _as_matrix(quad_matrix) n_lines, n_columns = np.shape(quad_matrix) - assert(n_lines == n_columns) + assert (n_lines == n_columns) if self._linear_func is not None: - assert(np.shape(quad_matrix)[0] == self._n_variables) + assert (np.shape(quad_matrix)[0] == self._n_variables) else: self._n_variables = n_lines @@ -438,7 +484,7 @@ class ConvexProgram(object): lin_vector = _as_column_matrix(lin_vector) if self._quadratic_func is not None: - assert(np.shape(lin_vector)[0] == self._n_variables) + assert (np.shape(lin_vector)[0] == self._n_variables) else: self._n_variables = np.shape(lin_vector)[0] @@ -451,7 +497,8 @@ class ConvexProgram(object): self._assert_objective_function_is_set() - if 1 in np.shape(inequality_matrix) or len(np.shape(inequality_matrix)) == 1: + if 1 in np.shape(inequality_matrix) or len( + np.shape(inequality_matrix)) == 1: inequality_matrix = _as_line_matrix(inequality_matrix) else: inequality_matrix = _as_matrix(inequality_matrix) @@ -463,14 +510,16 @@ class ConvexProgram(object): if self._inequality_constraints_matrix is None: self._inequality_constraints_matrix = inequality_matrix else: - self._inequality_constraints_matrix = np.append(self._inequality_constraints_matrix, - inequality_matrix, axis=0) + self._inequality_constraints_matrix = np.append( + self._inequality_constraints_matrix, + inequality_matrix, axis=0) if self._inequality_constraints_values is None: self._inequality_constraints_values = inequality_values else: - self._inequality_constraints_values = np.append(self._inequality_constraints_values, - inequality_values, axis=0) + self._inequality_constraints_values = np.append( + self._inequality_constraints_values, + inequality_values, axis=0) def add_equality_constraints(self, equality_matrix, equality_values): if equality_matrix is None: @@ -478,7 +527,8 @@ class ConvexProgram(object): self._assert_objective_function_is_set() - if 1 in np.shape(equality_matrix) or len(np.shape(equality_matrix)) == 1: + if 1 in np.shape(equality_matrix) or len( + np.shape(equality_matrix)) == 1: equality_matrix = _as_line_matrix(equality_matrix) else: equality_matrix = _as_matrix(equality_matrix) @@ -490,24 +540,28 @@ class ConvexProgram(object): if self._equality_constraints_matrix is None: self._equality_constraints_matrix = equality_matrix else: - self._equality_constraints_matrix = np.append(self._equality_constraints_matrix, - equality_matrix, axis=0) + self._equality_constraints_matrix = np.append( + self._equality_constraints_matrix, + equality_matrix, axis=0) if self._equality_constraints_values is None: self._equality_constraints_values = equality_values else: - self._equality_constraints_values = np.append(self._equality_constraints_values, - equality_values, axis=0) + self._equality_constraints_values = np.append( + self._equality_constraints_values, + equality_values, axis=0) def add_lower_bound(self, lower_bound): if lower_bound is not None: self._assert_objective_function_is_set() - self._lower_bound_values = np.array([lower_bound] * self._n_variables) + self._lower_bound_values = np.array( + [lower_bound] * self._n_variables) def add_upper_bound(self, upper_bound): if upper_bound is not None: self._assert_objective_function_is_set() - self._upper_bound_values = np.array([upper_bound] * self._n_variables) + self._upper_bound_values = np.array( + [upper_bound] * self._n_variables) def _convert_bounds_to_inequality_constraints(self): self._assert_objective_function_is_set() @@ -547,21 +601,26 @@ class ConvexProgram(object): self._linear_func = cvxopt_matrix(np.zeros((self._n_variables, 1))) if self._inequality_constraints_matrix is not None: - self._inequality_constraints_matrix = cvxopt_matrix(self._inequality_constraints_matrix) + self._inequality_constraints_matrix = cvxopt_matrix( + self._inequality_constraints_matrix) if self._inequality_constraints_values is not None: - self._inequality_constraints_values = cvxopt_matrix(self._inequality_constraints_values) + self._inequality_constraints_values = cvxopt_matrix( + self._inequality_constraints_values) if self._equality_constraints_matrix is not None: - self._equality_constraints_matrix = cvxopt_matrix(self._equality_constraints_matrix) + self._equality_constraints_matrix = cvxopt_matrix( + self._equality_constraints_matrix) if self._equality_constraints_values is not None: - self._equality_constraints_values = cvxopt_matrix(self._equality_constraints_values) + self._equality_constraints_values = cvxopt_matrix( + self._equality_constraints_values) def _assert_objective_function_is_set(self): assert self._n_variables is not None - def solve(self, solver="cvxopt", feastol=1e-7, abstol=1e-7, reltol=1e-6, return_all_information=False): + def solve(self, solver="cvxopt", feastol=1e-7, abstol=1e-7, reltol=1e-6, + return_all_information=False): # Some solvers are very verbose, and we don't want them to pollute STDOUT or STDERR. original_stdout = sys.stdout @@ -591,8 +650,10 @@ class ConvexProgram(object): self._convert_to_cvxopt_matrices() if self._quadratic_func is not None: - ret = qp(self.quadratic_func, self.linear_func, self._inequality_constraints_matrix, - self._inequality_constraints_values, self._equality_constraints_matrix, + ret = qp(self.quadratic_func, self.linear_func, + self._inequality_constraints_matrix, + self._inequality_constraints_values, + self._equality_constraints_matrix, self._equality_constraints_values) else: @@ -602,8 +663,8 @@ class ConvexProgram(object): A=self._equality_constraints_matrix, b=self._equality_constraints_values) - #logging.info("Primal objective value = {}".format(ret['primal objective'])) - #logging.info("Dual objective value = {}".format(ret['dual objective'])) + # logging.info("Primal objective value = {}".format(ret['primal objective'])) + # logging.info("Dual objective value = {}".format(ret['dual objective'])) if not return_all_information: ret = np.asarray(np.array(ret['x']).T[0]) @@ -628,29 +689,39 @@ class ConvexProgram(object): if self.linear_func is not None: p.objective.set_linear(zip(names, - np.asarray(self.linear_func.T).reshape(self.n_variables,).tolist())) + np.asarray( + self.linear_func.T).reshape( + self.n_variables, ).tolist())) if self._inequality_constraints_matrix is not None: inequality_linear = [] for line in self._inequality_constraints_matrix: inequality_linear.append([names, line.tolist()[0]]) p.linear_constraints.add(lin_expr=inequality_linear, - rhs=np.asarray(self._inequality_constraints_values.T).tolist()[0], - senses="L"*len(self._inequality_constraints_values)) + rhs=np.asarray( + self._inequality_constraints_values.T).tolist()[ + 0], + senses="L" * len( + self._inequality_constraints_values)) if self._equality_constraints_matrix is not None: equality_linear = [] for line in self._equality_constraints_matrix: equality_linear.append([names, line.tolist()[0]]) p.linear_constraints.add(lin_expr=equality_linear, - rhs=np.asarray(self._equality_constraints_values.T).tolist()[0], - senses="E"*len(self._equality_constraints_values)) + rhs=np.asarray( + self._equality_constraints_values.T).tolist()[ + 0], + senses="E" * len( + self._equality_constraints_values)) if self._lower_bound_values is not None: - p.variables.set_lower_bounds(zip(names, self._lower_bound_values)) + p.variables.set_lower_bounds( + zip(names, self._lower_bound_values)) if self._upper_bound_values is not None: - p.variables.set_upper_bounds(zip(names, self._upper_bound_values)) + p.variables.set_upper_bounds( + zip(names, self._upper_bound_values)) p.solve() @@ -668,15 +739,18 @@ class ConvexProgram(object): q = model.new(self.n_variables) if self._inequality_constraints_matrix is not None: - model.constrain(self._inequality_constraints_matrix * q <= self._inequality_constraints_values) + model.constrain( + self._inequality_constraints_matrix * q <= self._inequality_constraints_values) if self._equality_constraints_matrix is not None: - model.constrain(self._equality_constraints_matrix * q == self._equality_constraints_values) + model.constrain( + self._equality_constraints_matrix * q == self._equality_constraints_values) if self._lower_bound_values is not None: model.constrain(q >= self._lower_bound_values) if self._upper_bound_values is not None: model.constrain(q <= self._upper_bound_values) - value = model.minimize(0.5 * q.T * self._quadratic_func * q + self.linear_func.T * q) + value = model.minimize( + 0.5 * q.T * self._quadratic_func * q + self.linear_func.T * q) if not return_all_information: ret = np.array(model[q]) @@ -753,20 +827,23 @@ class ConvexProgram(object): return signs -def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None, set="train", zero_to_one=True): +def get_accuracy_graph(plotted_data, classifier_name, file_name, + name="Accuracies", bounds=None, bound_name=None, + boosting_bound=None, set="train", zero_to_one=True): if type(name) is not str: name = " ".join(name.getConfig().strip().split(" ")[:2]) f, ax = plt.subplots(nrows=1, ncols=1) if zero_to_one: - ax.set_ylim(bottom=0.0,top=1.0) - ax.set_title(name+" during "+set+" for "+classifier_name) + ax.set_ylim(bottom=0.0, top=1.0) + ax.set_title(name + " during " + set + " for " + classifier_name) x = np.arange(len(plotted_data)) scat = ax.scatter(x, np.array(plotted_data), marker=".") if bounds: if boosting_bound: scat2 = ax.scatter(x, boosting_bound, marker=".") scat3 = ax.scatter(x, np.array(bounds), marker=".", ) - ax.legend((scat, scat2, scat3), (name,"Boosting bound", bound_name)) + ax.legend((scat, scat2, scat3), + (name, "Boosting bound", bound_name)) else: scat2 = ax.scatter(x, np.array(bounds), marker=".", ) ax.legend((scat, scat2), @@ -782,9 +859,13 @@ class BaseBoost(object): def _collect_probas(self, X, sub_sampled=False): if self.estimators_generator.__class__.__name__ == "TreeClassifiersGenerator": - return np.asarray([clf.predict_proba(X[:,attribute_indices]) for clf, attribute_indices in zip(self.estimators_generator.estimators_, self.estimators_generator.attribute_indices)]) + return np.asarray([clf.predict_proba(X[:, attribute_indices]) for + clf, attribute_indices in + zip(self.estimators_generator.estimators_, + self.estimators_generator.attribute_indices)]) else: - return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_]) + return np.asarray([clf.predict_proba(X) for clf in + self.estimators_generator.estimators_]) def _binary_classification_matrix(self, X): probas = self._collect_probas(X) @@ -794,30 +875,39 @@ class BaseBoost(object): return (predicted_labels * values).T def _initialize_alphas(self, n_examples): - raise NotImplementedError("Alpha weights initialization function is not implemented.") + raise NotImplementedError( + "Alpha weights initialization function is not implemented.") def check_opposed_voters(self, ): nb_opposed = 0 oppposed = [] - for column in self.classification_matrix[:, self.chosen_columns_].transpose(): + for column in self.classification_matrix[:, + self.chosen_columns_].transpose(): for chosen_col in self.chosen_columns_: - if (-column.reshape((self.n_total_examples, 1)) == self.classification_matrix[:, chosen_col].reshape((self.n_total_examples, 1))).all(): - nb_opposed+=1 + if (-column.reshape((self.n_total_examples, + 1)) == self.classification_matrix[:, + chosen_col].reshape( + (self.n_total_examples, 1))).all(): + nb_opposed += 1 break - return int(nb_opposed/2) + return int(nb_opposed / 2) def getInterpretBase(classifier, directory, classifier_name, weights, break_cause=" the dual constrail was not violated"): - interpretString = "\t "+classifier_name+" permformed classification with weights : \n" + interpretString = "\t " + classifier_name + " permformed classification with weights : \n" # weights_sort = np.argsort(-weights) weights_sort = np.arange(weights.shape[0]) - interpretString += np.array2string(weights[weights_sort], precision=4, separator=',', suppress_small=True) - interpretString += "\n \t It generated {} columns by attributes and used {} iterations to converge, and selected {} couple(s) of opposed voters".format(classifier.n_stumps, + interpretString += np.array2string(weights[weights_sort], precision=4, + separator=',', suppress_small=True) + interpretString += "\n \t It generated {} columns by attributes and used {} iterations to converge, and selected {} couple(s) of opposed voters".format( + classifier.n_stumps, len(weights_sort), classifier.nb_opposed_voters) if max(weights) > 0.50: - interpretString += "\n \t The vote is useless in this context : voter nb {} is a dictator of weight > 0.50".format(classifier.chosen_columns_[np.argmax(np.array(weights))]) - if len(weights_sort) == classifier.n_max_iterations or len(weights) == classifier.n_total_hypotheses_: + interpretString += "\n \t The vote is useless in this context : voter nb {} is a dictator of weight > 0.50".format( + classifier.chosen_columns_[np.argmax(np.array(weights))]) + if len(weights_sort) == classifier.n_max_iterations or len( + weights) == classifier.n_total_hypotheses_: if len(weights) == classifier.n_max_iterations: interpretString += ", and used all available iterations, " else: @@ -830,14 +920,26 @@ def getInterpretBase(classifier, directory, classifier_name, weights, pass # interpretString += ", and the loop was broken because "+break_cause interpretString += "\n\t Selected voters : \n" - interpretString += np.array2string(np.array(classifier.chosen_columns_)[weights_sort]) - interpretString += "\n\t Trained in "+str(datetime.timedelta(seconds=classifier.train_time))+" and predicted in "+str(datetime.timedelta(seconds=classifier.predict_time))+"." + interpretString += np.array2string( + np.array(classifier.chosen_columns_)[weights_sort]) + interpretString += "\n\t Trained in " + str(datetime.timedelta( + seconds=classifier.train_time)) + " and predicted in " + str( + datetime.timedelta(seconds=classifier.predict_time)) + "." interpretString += "\n\t Selected columns : \n" - interpretString += np.array2string(classifier.classification_matrix[:, classifier.chosen_columns_], precision=4, - separator=',', suppress_small=True) - np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',') + interpretString += np.array2string( + classifier.classification_matrix[:, classifier.chosen_columns_], + precision=4, + separator=',', suppress_small=True) + np.savetxt(directory + "voters.csv", + classifier.classification_matrix[:, classifier.chosen_columns_], + delimiter=',') np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',') - np.savetxt(directory + "times.csv", np.array([classifier.train_time, classifier.predict_time]), delimiter=',') - np.savetxt(directory + "sparsity.csv", np.array([len(weights_sort)]), delimiter=',') - get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds, "Boosting bound") + np.savetxt(directory + "times.csv", + np.array([classifier.train_time, classifier.predict_time]), + delimiter=',') + np.savetxt(directory + "sparsity.csv", np.array([len(weights_sort)]), + delimiter=',') + get_accuracy_graph(classifier.train_metrics, classifier_name, + directory + 'metrics.png', classifier.plotted_metric, + classifier.bounds, "Boosting bound") return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py index 61ccc61640774011b7614fd086ac7043e5394a5e..82522b306b2ab41e111496655000f16a7165aa14 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CGDescUtils.py @@ -1,17 +1,19 @@ -import scipy import logging +import math +import time + import numpy as np import numpy.ma as ma -import math -from sklearn.utils.validation import check_is_fitted +import scipy from sklearn.base import BaseEstimator, ClassifierMixin -import time +from sklearn.utils.validation import check_is_fitted from .BoostUtils import StumpsClassifiersGenerator, sign, BaseBoost, \ getInterpretBase, get_accuracy_graph, TreeClassifiersGenerator -from ..MonoviewUtils import change_label_to_zero, change_label_to_minus +from ..MonoviewUtils import change_label_to_minus from ... import Metrics + # Used for QarBoost and CGreed class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): @@ -73,7 +75,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def fit(self, X, y): - formatted_X, formatted_y = self.format_X_y(X, y) self.init_info_containers() @@ -91,16 +92,16 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): for k in range(min(n - 1, self.n_max_iterations - 1 if self.n_max_iterations is not None else np.inf)): - # Print dynamically the step and the error of the current classifier self.it = k print( - "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound, - self.bounds[-1] > self.train_metrics[-1], - k + 2, - self.n_max_iterations, - self.voter_perfs[-1]), + "Resp. bound : {}, {}; {}/{}, eps :{}".format( + self.respected_bound, + self.bounds[-1] > self.train_metrics[-1], + k + 2, + self.n_max_iterations, + self.voter_perfs[-1]), end="\r") sol, new_voter_index = self.choose_new_voter(y_kernel_matrix, formatted_y) @@ -117,10 +118,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.update_example_weights(formatted_y) - self.update_info_containers(formatted_y, voter_perf, k) - self.nb_opposed_voters = self.check_opposed_voters() self.estimators_generator.choose(self.chosen_columns_) @@ -160,33 +159,50 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.step_decisions = np.zeros(classification_matrix.shape) self.mincq_step_decisions = np.zeros(classification_matrix.shape) self.step_prod = np.zeros(classification_matrix.shape) - for weight_index in range(self.weights_.shape[0]-1): - margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) + for weight_index in range(self.weights_.shape[0] - 1): + margins = np.sum( + classification_matrix[:, :weight_index + 1] * self.weights_[ + :weight_index + 1], + axis=1) signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 self.step_decisions[:, weight_index] = signs_array - self.step_prod[:, weight_index] = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) + self.step_prod[:, weight_index] = np.sum( + classification_matrix[:, :weight_index + 1] * self.weights_[ + :weight_index + 1], + axis=1) if self.mincq_tracking: - if weight_index ==0: - self.mincq_step_decisions[:,weight_index] = signs_array + if weight_index == 0: + self.mincq_step_decisions[:, weight_index] = signs_array else: - mincq_margins = np.sum(self.mincq_learners[weight_index-1].majority_vote._weights*classification_matrix[:,:weight_index+1], axis=1) - mincq_signs_array = np.array([int(x) for x in sign(mincq_margins)]) + mincq_margins = np.sum(self.mincq_learners[ + weight_index - 1].majority_vote._weights * classification_matrix[ + :, + :weight_index + 1], + axis=1) + mincq_signs_array = np.array( + [int(x) for x in sign(mincq_margins)]) mincq_signs_array[mincq_signs_array == -1] = 0 - self.mincq_step_decisions[:, weight_index] = mincq_signs_array + self.mincq_step_decisions[:, + weight_index] = mincq_signs_array # self.mincq_step_cbounds = self.mincq_learners[weight_index-1].majority_vote.cbound_value() def update_info_containers(self, y, voter_perf, k): """Is used at each iteration to compute and store all the needed quantities for later analysis""" self.example_weights_.append(self.example_weights) - self.tau.append(np.sum(np.multiply(self.previous_vote, self.new_voter))/float(self.n_total_examples)) + self.tau.append( + np.sum(np.multiply(self.previous_vote, self.new_voter)) / float( + self.n_total_examples)) # print(np.sum(np.multiply(self.previous_vote, self.new_voter))/float(self.n_total_examples)) self.previous_vote += self.q * self.new_voter - self.norm.append(np.linalg.norm(self.previous_vote)**2) + self.norm.append(np.linalg.norm(self.previous_vote) ** 2) self.previous_votes.append(self.previous_vote) self.previous_margins.append( - np.sum(np.multiply(y, self.previous_vote))/float(self.n_total_examples)) - self.selected_margins.append(np.sum(np.multiply(y, self.new_voter))/float(self.n_total_examples)) + np.sum(np.multiply(y, self.previous_vote)) / float( + self.n_total_examples)) + self.selected_margins.append( + np.sum(np.multiply(y, self.new_voter)) / float( + self.n_total_examples)) train_metric = self.plotted_metric.score(y, np.sign(self.previous_vote)) if self.use_r: bound = self.bounds[-1] * math.sqrt(1 - voter_perf ** 2) @@ -199,17 +215,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.train_metrics.append(train_metric) self.bounds.append(bound) - if self.mincq_tracking: # Used to compute the optimal c-bound distribution on the chose set + if self.mincq_tracking: # Used to compute the optimal c-bound distribution on the chose set from ...MonoviewClassifiers.MinCQ import MinCqLearner - mincq = MinCqLearner(10e-3, "stumps", n_stumps_per_attribute=1, self_complemented=False) + mincq = MinCqLearner(10e-3, "stumps", n_stumps_per_attribute=1, + self_complemented=False) training_set = self.classification_matrix[:, self.chosen_columns_] mincq.fit(training_set, y) mincq_pred = mincq.predict(training_set) self.mincq_learners.append(mincq) - self.mincq_train_metrics.append(self.plotted_metric.score(y, change_label_to_minus(mincq_pred))) + self.mincq_train_metrics.append( + self.plotted_metric.score(y, change_label_to_minus(mincq_pred))) self.mincq_weights.append(mincq.majority_vote._weights) - self.mincq_c_bounds.append(mincq.majority_vote.cbound_value(training_set, y.reshape((y.shape[0],)))) - + self.mincq_c_bounds.append( + mincq.majority_vote.cbound_value(training_set, + y.reshape((y.shape[0],)))) def compute_voter_weight(self, voter_perf, sol): """used to compute the voter's weight according to the specified method (edge or error) """ @@ -259,14 +278,14 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if self.random_start: first_voter_index = self.random_state.choice( - np.where(np.sum(y_kernel_matrix, axis=0)>0)[0]) + np.where(np.sum(y_kernel_matrix, axis=0) > 0)[0]) else: first_voter_index, _ = self._find_best_weighted_margin( y_kernel_matrix) self.chosen_columns_.append(first_voter_index) self.new_voter = np.array(self.classification_matrix[:, - first_voter_index].reshape((m, 1)), copy=True) + first_voter_index].reshape((m, 1)), copy=True) self.previous_vote = self.new_voter self.norm.append(np.linalg.norm(self.previous_vote) ** 2) @@ -290,9 +309,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.update_example_weights(y) self.example_weights_.append(self.example_weights) - self.previous_margins.append( - np.sum(np.multiply(y, self.previous_vote))/float(self.n_total_examples)) + np.sum(np.multiply(y, self.previous_vote)) / float( + self.n_total_examples)) self.selected_margins.append(np.sum(np.multiply(y, self.previous_vote))) self.tau.append( np.sum(np.multiply(self.previous_vote, self.new_voter)) / float( @@ -313,8 +332,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if self.mincq_tracking: self.mincq_train_metrics.append(train_metric) - - def format_X_y(self, X, y): """Formats the data : X -the examples- and y -the labels- to be used properly by the algorithm """ if scipy.sparse.issparse(X): @@ -332,7 +349,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): n_stumps_per_attribute=self.n_stumps, self_complemented=self.self_complemented) if self.estimators_generator is "Trees": - self.estimators_generator = TreeClassifiersGenerator(n_trees=self.n_stumps, max_depth=self.max_depth, + self.estimators_generator = TreeClassifiersGenerator( + n_trees=self.n_stumps, max_depth=self.max_depth, self_complemented=self.self_complemented) self.estimators_generator.fit(X, y) self.classification_matrix = self._binary_classification_matrix(X) @@ -360,14 +378,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.respected_bound = True self.selected_margins = [] self.tau = [] - self.norm=[] + self.norm = [] self.mincq_train_metrics = [] self.mincq_c_bounds = [] self.mincq_weights = [] self.mincq_learners = [] self.mincq_step_decisions = [] - def _compute_epsilon(self, y): """Updating the error variable, the old fashioned way uses the whole majority vote to update the error""" ones_matrix = np.zeros(y.shape) @@ -408,31 +425,37 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): and select the one that has the smallest minimum""" m = y_kernel_matrix.shape[0] weighted_previous_sum = np.multiply(y, - self.previous_vote.reshape(m,1)) + self.previous_vote.reshape(m, 1)) margin_old = np.sum(weighted_previous_sum) if self.c_bound_sol: weighted_hypothesis = y_kernel_matrix else: - weighted_hypothesis = y_kernel_matrix * self.example_weights.reshape((m, 1)) + weighted_hypothesis = y_kernel_matrix * self.example_weights.reshape( + (m, 1)) - bad_margins = np.where(np.sum(weighted_hypothesis, axis=0)<=0.0)[0] + bad_margins = np.where(np.sum(weighted_hypothesis, axis=0) <= 0.0)[0] self.B2 = m - self.B1s = np.sum(2 * np.multiply(weighted_previous_sum, weighted_hypothesis), - axis=0) + self.B1s = np.sum( + 2 * np.multiply(weighted_previous_sum, weighted_hypothesis), + axis=0) self.B0 = np.sum(weighted_previous_sum ** 2) self.A2s = np.sum(weighted_hypothesis, axis=0) ** 2 self.A1s = np.sum(weighted_hypothesis, axis=0) * margin_old * 2 - self.A0 = margin_old**2 + self.A0 = margin_old ** 2 C2s = (self.A1s * self.B2 - self.A2s * self.B1s) C1s = 2 * (self.A0 * self.B2 - self.A2s * self.B0) C0s = self.A0 * self.B1s - self.A1s * self.B0 - sols = np.zeros(C0s.shape)-3 + sols = np.zeros(C0s.shape) - 3 # sols[np.where(C2s == 0)[0]] = C0s[np.where(C2s == 0)[0]] / C1s[np.where(C2s == 0)[0]] - sols[np.where(C2s != 0)[0]] = (-C1s[np.where(C2s != 0)[0]] + np.sqrt(C1s[np.where(C2s != 0)[0]] * C1s[np.where(C2s != 0)[0]] - 4 * C2s[np.where(C2s != 0)[0]] * C0s[np.where(C2s != 0)[0]])) / (2 * C2s[np.where(C2s != 0)[0]]) + sols[np.where(C2s != 0)[0]] = (-C1s[np.where(C2s != 0)[0]] + np.sqrt( + C1s[np.where(C2s != 0)[0]] * C1s[np.where(C2s != 0)[0]] - 4 * C2s[ + np.where(C2s != 0)[0]] * C0s[np.where(C2s != 0)[0]])) / ( + 2 * C2s[ + np.where(C2s != 0)[0]]) masked_c_bounds = self.make_masked_c_bounds(sols, bad_margins) if masked_c_bounds.mask.all(): @@ -441,8 +464,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): best_hyp_index = np.argmin(masked_c_bounds) self.c_bounds.append(masked_c_bounds[best_hyp_index]) - self.margins.append(math.sqrt(self.A2s[best_hyp_index]/m)) - self.disagreements.append(0.5*self.B1s[best_hyp_index]/m) + self.margins.append(math.sqrt(self.A2s[best_hyp_index] / m)) + self.disagreements.append(0.5 * self.B1s[best_hyp_index] / m) return sols[best_hyp_index], best_hyp_index def make_masked_c_bounds(self, sols, bad_margins): @@ -463,19 +486,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def compute_c_bounds(self, sols): return 1 - (self.A2s * sols ** 2 + self.A1s * sols + self.A0) / (( - self.B2 * sols ** 2 + self.B1s * sols + self.B0) * self.n_total_examples) - + self.B2 * sols ** 2 + self.B1s * sols + self.B0) * self.n_total_examples) def _cbound(self, sol): """Computing the objective function""" return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / (( - self.B2 * sol ** 2 + self.B1 * sol + self.B0) * self.n_total_examples) + self.B2 * sol ** 2 + self.B1 * sol + self.B0) * self.n_total_examples) def disagreement(self, sol): - return (self.B2 * sol ** 2 + self.B1 * sol + self.B0)/self.n_total_examples + return ( + self.B2 * sol ** 2 + self.B1 * sol + self.B0) / self.n_total_examples def margin(self, sol): - return (self.A2 * sol ** 2 + self.A1 * sol + self.A0)/self.n_total_examples + return ( + self.A2 * sol ** 2 + self.A1 * sol + self.A0) / self.n_total_examples def _best_sol(self, sols): """Return the best min in the two possible sols""" @@ -487,10 +511,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): return 1.0 / n_examples * np.ones((n_examples,)) def get_step_decision_test_graph(self, directory, y_test): - np.savetxt(directory + "y_test_step.csv", self.step_decisions, delimiter=',') + np.savetxt(directory + "y_test_step.csv", self.step_decisions, + delimiter=',') step_metrics = [] - for step_index in range(self.step_decisions.shape[1]-1): - step_metrics.append(self.plotted_metric.score(y_test, self.step_decisions[:, step_index])) + for step_index in range(self.step_decisions.shape[1] - 1): + step_metrics.append(self.plotted_metric.score(y_test, + self.step_decisions[:, + step_index])) step_metrics = np.array(step_metrics) np.savetxt(directory + "step_test_metrics.csv", step_metrics, delimiter=',') @@ -502,7 +529,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): step_mincq_test_metrics = [] for step_index in range(self.step_decisions.shape[1] - 1): step_mincq_test_metrics.append(self.plotted_metric.score(y_test, - self.mincq_step_decisions[:, + self.mincq_step_decisions[ + :, step_index])) np.savetxt(directory + "mincq_step_test_metrics.csv", step_mincq_test_metrics, @@ -514,9 +542,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): step_cbounds = [] for step_index in range(self.step_prod.shape[1]): - num = np.sum(y_test*self.step_prod[:, step_index])**2 - den = np.sum((self.step_prod[:, step_index])**2) - step_cbounds.append(1-num/(den*self.step_prod.shape[0])) + num = np.sum(y_test * self.step_prod[:, step_index]) ** 2 + den = np.sum((self.step_prod[:, step_index]) ** 2) + step_cbounds.append(1 - num / (den * self.step_prod.shape[0])) step_cbounds = np.array(step_cbounds) np.savetxt(directory + "step_test_c_bounds.csv", step_cbounds, delimiter=',') @@ -532,35 +560,47 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): # get_accuracy_graph(self.voter_perfs[:20], self.__class__.__name__, # directory + 'voter_perfs.png', "Rs") get_accuracy_graph(self.weights_, self.__class__.__name__, - directory+'vote_weights.png', "weights", zero_to_one=False) + directory + 'vote_weights.png', "weights", + zero_to_one=False) get_accuracy_graph(self.c_bounds, self.__class__.__name__, directory + 'c_bounds.png', "C-Bounds") if self.mincq_tracking: get_accuracy_graph(self.c_bounds, self.__class__.__name__, - directory + 'c_bounds_comparaison.png', "1-var mins", self.mincq_c_bounds, "MinCQ min", zero_to_one=False) + directory + 'c_bounds_comparaison.png', + "1-var mins", self.mincq_c_bounds, "MinCQ min", + zero_to_one=False) get_accuracy_graph(self.train_metrics, self.__class__.__name__, - directory + 'train_metrics_comparaison.png', self.plotted_metric, + directory + 'train_metrics_comparaison.png', + self.plotted_metric, self.mincq_train_metrics, "MinCQ metrics") get_accuracy_graph(self.previous_margins, self.__class__.__name__, - directory + 'margins.png', "Margins", zero_to_one=False) + directory + 'margins.png', "Margins", + zero_to_one=False) get_accuracy_graph(self.selected_margins, self.__class__.__name__, - directory + 'selected_margins.png', "Selected Margins") + directory + 'selected_margins.png', + "Selected Margins") self.tau[0] = 0 get_accuracy_graph(self.tau, self.__class__.__name__, - directory + 'disagreements.png', "disagreements", zero_to_one=False) + directory + 'disagreements.png', "disagreements", + zero_to_one=False) get_accuracy_graph(self.train_metrics[:-1], self.__class__.__name__, - directory + 'c_bounds_train_metrics.png', self.plotted_metric, self.c_bounds, "C-Bound", self.bounds[:-1]) + directory + 'c_bounds_train_metrics.png', + self.plotted_metric, self.c_bounds, "C-Bound", + self.bounds[:-1]) get_accuracy_graph(self.norm, self.__class__.__name__, directory + 'norms.png', - "squared 2-norm",zero_to_one=False) - interpretString = getInterpretBase(self, directory, self.__class__.__name__, + "squared 2-norm", zero_to_one=False) + interpretString = getInterpretBase(self, directory, + self.__class__.__name__, self.weights_, self.break_cause) if self.save_train_data: - np.savetxt(directory+"x_train.csv", self.X_train, delimiter=',') - np.savetxt(directory+"y_train.csv", self.y_train, delimiter=',') - np.savetxt(directory + "raw_weights.csv", self.raw_weights, delimiter=',') + np.savetxt(directory + "x_train.csv", self.X_train, delimiter=',') + np.savetxt(directory + "y_train.csv", self.y_train, delimiter=',') + np.savetxt(directory + "raw_weights.csv", self.raw_weights, + delimiter=',') np.savetxt(directory + "c_bounds.csv", self.c_bounds, delimiter=',') - np.savetxt(directory + "train_metrics.csv", self.train_metrics, delimiter=',') + np.savetxt(directory + "train_metrics.csv", self.train_metrics, + delimiter=',') np.savetxt(directory + "margins.csv", self.previous_margins, delimiter=',') np.savetxt(directory + "disagreements.csv", self.tau, @@ -570,15 +610,16 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): if self.mincq_tracking: np.savetxt(directory + "mincq_cbounds.csv", self.mincq_c_bounds, delimiter=',') - np.savetxt(directory + "mincq_train_metrics.csv", self.mincq_train_metrics, + np.savetxt(directory + "mincq_train_metrics.csv", + self.mincq_train_metrics, delimiter=',') args_dict = dict( (arg_name, str(self.__dict__[arg_name])) for arg_name in self.printed_args_name_list) interpretString += "\n \n With arguments : \n" + u'\u2022 ' + ( - "\n" + u'\u2022 ').join(['%s: \t%s' % (key, value) - for (key, value) in - args_dict.items()]) + "\n" + u'\u2022 ').join(['%s: \t%s' % (key, value) + for (key, value) in + args_dict.items()]) if not self.respected_bound: interpretString += "\n\n The bound was not respected" diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py index 6560eb1a4eee3c279dad43cbbf4b1f0a07376d93..6af59377abf2b927f3a2a8d1f77bad34724c914b 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py @@ -1,20 +1,24 @@ -import scipy import logging -import numpy.ma as ma +import math +import time from collections import defaultdict -from sklearn.utils.validation import check_is_fitted + +import numpy as np +import numpy.ma as ma +import scipy from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.metrics import accuracy_score -import numpy as np -import time -import math +from sklearn.utils.validation import check_is_fitted -from .BoostUtils import StumpsClassifiersGenerator, ConvexProgram, sign, BaseBoost, TreeClassifiersGenerator +from .BoostUtils import StumpsClassifiersGenerator, ConvexProgram, sign, \ + BaseBoost, TreeClassifiersGenerator from ... import Metrics class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): - def __init__(self, mu=0.01, epsilon=1e-06, n_max_iterations=100, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=None): + def __init__(self, mu=0.01, epsilon=1e-06, n_max_iterations=100, + estimators_generator=None, dual_constraint_rhs=0, + save_iteration_as_hyperparameter_each=None, random_state=None): super(ColumnGenerationClassifier, self).__init__() self.epsilon = epsilon self.n_max_iterations = n_max_iterations @@ -32,9 +36,12 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): y[y == 0] = -1 if self.estimators_generator is "Stumps": - self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=True) + self.estimators_generator = StumpsClassifiersGenerator( + n_stumps_per_attribute=self.n_stumps, self_complemented=True) elif self.estimators_generator is "Trees": - self.estimators_generator = TreeClassifiersGenerator(max_depth=self.max_depth, n_trees=self.n_stumps, self_complemented=True) + self.estimators_generator = TreeClassifiersGenerator( + max_depth=self.max_depth, n_trees=self.n_stumps, + self_complemented=True) self.estimators_generator.fit(X, y) self.classification_matrix = self._binary_classification_matrix(X) @@ -48,39 +55,47 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.n_total_examples = m self.train_shape = self.classification_matrix.shape - y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), self.classification_matrix) + y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), + self.classification_matrix) # Initialization alpha = self._initialize_alphas(m) self.initialize() self.train_metrics = [] self.gammas = [] - self.list_weights= [] + self.list_weights = [] self.bounds = [] self.previous_votes = [] # w = [0.5,0.5] - w= None + w = None self.collected_weight_vectors_ = {} self.collected_dual_constraint_violations_ = {} start = time.time() - for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)): + for k in range(min(n, + self.n_max_iterations if self.n_max_iterations is not None else np.inf)): # Find worst weak hypothesis given alpha. - h_values = ma.array(np.squeeze(np.array((alpha).T.dot(y_kernel_matrix).T)), fill_value=-np.inf) + h_values = ma.array( + np.squeeze(np.array((alpha).T.dot(y_kernel_matrix).T)), + fill_value=-np.inf) h_values[self.chosen_columns_] = ma.masked worst_h_index = ma.argmax(h_values) # Check for optimal solution. We ensure at least one complete iteration is done as the initialization # values might provide a degenerate initial solution. - if h_values[worst_h_index] <= self.dual_constraint_rhs + self.epsilon and len(self.chosen_columns_) > 0: + if h_values[ + worst_h_index] <= self.dual_constraint_rhs + self.epsilon and len( + self.chosen_columns_) > 0: break # Append the weak hypothesis. self.chosen_columns_.append(worst_h_index) - self.matrix_to_optimize = self.get_matrix_to_optimize(y_kernel_matrix,w) + self.matrix_to_optimize = self.get_matrix_to_optimize( + y_kernel_matrix, w) # Solve restricted master for new costs. - w, alpha = self._restricted_master_problem(previous_w=w, previous_alpha=alpha) + w, alpha = self._restricted_master_problem(previous_w=w, + previous_alpha=alpha) cbound = self.compute_empiric_cbound(w, y_kernel_matrix) self.c_bounds.append(cbound) self.list_weights.append(w) @@ -91,7 +106,8 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): signs_array = np.array([int(x) for x in sign(margins)]) self.train_metrics.append(self.plotted_metric.score(y, signs_array)) self.gammas.append(accuracy_score(y, signs_array)) - self.bounds.append(math.exp(-2 * np.sum(np.square(np.array(self.gammas))))) + self.bounds.append( + math.exp(-2 * np.sum(np.square(np.array(self.gammas))))) self.nb_opposed_voters = self.check_opposed_voters() self.compute_weights_(w) @@ -99,7 +115,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.estimators_generator.choose(self.chosen_columns_) end = time.time() - self.train_time = end-start + self.train_time = end - start y[y == -1] = 0 return self @@ -112,39 +128,52 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): X = np.array(X.todense()) classification_matrix = self._binary_classification_matrix(X) - margins = np.squeeze(np.asarray(np.dot(classification_matrix, self.weights_))) + margins = np.squeeze( + np.asarray(np.dot(classification_matrix, self.weights_))) signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 end = time.time() - self.predict_time = end-start + self.predict_time = end - start self.step_predict(classification_matrix) return signs_array def compute_empiric_cbound(self, w, y_kernel_matrix): - cbound = 1 - (1.0/self.n_total_examples) * (np.sum(np.average(y_kernel_matrix[:, self.chosen_columns_], axis=1, weights=w))**2/ - np.sum(np.average(y_kernel_matrix[:, self.chosen_columns_], axis=1, weights=w)**2)) + cbound = 1 - (1.0 / self.n_total_examples) * (np.sum( + np.average(y_kernel_matrix[:, self.chosen_columns_], axis=1, + weights=w)) ** 2 / + np.sum(np.average( + y_kernel_matrix[:, + self.chosen_columns_], + axis=1, + weights=w) ** 2)) return cbound def step_predict(self, classification_matrix): if classification_matrix.shape != self.train_shape: self.step_decisions = np.zeros(classification_matrix.shape) self.step_prod = np.zeros(classification_matrix.shape) - for weight_index in range(self.weights_.shape[0]-1): - margins = np.sum(classification_matrix[:, :weight_index+1]* self.list_weights[weight_index], axis=1) + for weight_index in range(self.weights_.shape[0] - 1): + margins = np.sum(classification_matrix[:, :weight_index + 1] * + self.list_weights[weight_index], axis=1) signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 self.step_decisions[:, weight_index] = signs_array - self.step_prod[:, weight_index] = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) + self.step_prod[:, weight_index] = np.sum( + classification_matrix[:, :weight_index + 1] * self.weights_[ + :weight_index + 1], + axis=1) def initialize(self): pass - def update_values(self, h_values=None, worst_h_index=None, alpha=None, w=None): + def update_values(self, h_values=None, worst_h_index=None, alpha=None, + w=None): pass - def get_margins(self,w): - margins = np.squeeze(np.asarray(np.dot(self.classification_matrix[:, self.chosen_columns_], w))) + def get_margins(self, w): + margins = np.squeeze(np.asarray( + np.dot(self.classification_matrix[:, self.chosen_columns_], w))) return margins def compute_weights_(self, w=None): @@ -170,20 +199,28 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): m_ones = np.ones((n_examples, 1)) qp_a = np.vstack((np.hstack((-self.matrix_to_optimize, m_eye)), - np.hstack((np.ones((1, n_hypotheses)), np.zeros((1, n_examples)))))) + np.hstack((np.ones((1, n_hypotheses)), + np.zeros((1, n_examples)))))) qp_b = np.vstack((np.zeros((n_examples, 1)), np.array([1.0]).reshape((1, 1)))) - qp_g = np.vstack((np.hstack((-np.eye(n_hypotheses), np.zeros((n_hypotheses, n_examples)))), - np.hstack((np.zeros((1, n_hypotheses)), - 1.0 / n_examples * m_ones.T)))) + qp_g = np.vstack((np.hstack( + (-np.eye(n_hypotheses), np.zeros((n_hypotheses, n_examples)))), + np.hstack((np.zeros((1, n_hypotheses)), + - 1.0 / n_examples * m_ones.T)))) qp_h = np.vstack((np.zeros((n_hypotheses, 1)), np.array([-self.mu]).reshape((1, 1)))) qp = ConvexProgram() - qp.quadratic_func = 2.0 / n_examples * np.vstack((np.hstack((np.zeros((n_hypotheses, n_hypotheses)), np.zeros((n_hypotheses, n_examples)))), - np.hstack((np.zeros((n_examples, n_hypotheses)), m_eye)))) + qp.quadratic_func = 2.0 / n_examples * np.vstack((np.hstack((np.zeros( + (n_hypotheses, n_hypotheses)), np.zeros( + (n_hypotheses, n_examples)))), + np.hstack((np.zeros(( + n_examples, + n_hypotheses)), + m_eye)))) qp.add_equality_constraints(qp_a, qp_b) qp.add_inequality_constraints(qp_g, qp_h) @@ -192,7 +229,8 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): qp.initial_values = np.append(previous_w, [0]) try: - solver_result = qp.solve(abstol=1e-10, reltol=1e-10, feastol=1e-10, return_all_information=True) + solver_result = qp.solve(abstol=1e-10, reltol=1e-10, feastol=1e-10, + return_all_information=True) w = np.asarray(np.array(solver_result['x']).T[0])[:n_hypotheses] # The alphas are the Lagrange multipliers associated with the equality constraints (returned as the y vector in CVXOPT). @@ -206,7 +244,8 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): # logging.info('Updating dual constraint rhs: {}'.format(self.dual_constraint_rhs)) except: - logging.warning('QP Solving failed at iteration {}.'.format(n_hypotheses)) + logging.warning( + 'QP Solving failed at iteration {}.'.format(n_hypotheses)) if previous_w is not None: w = np.append(previous_w, [0]) else: @@ -222,7 +261,6 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): def _initialize_alphas(self, n_examples): return 1.0 / n_examples * np.ones((n_examples,)) - # class CqBoostClassifier(ColumnGenerationClassifier): # def __init__(self, mu=0.001, epsilon=1e-08, n_max_iterations=None, estimators_generator=None, save_iteration_as_hyperparameter_each=None): # super(CqBoostClassifier, self).__init__(epsilon, n_max_iterations, estimators_generator, dual_constraint_rhs=0, diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py index 0e8479a394c1e0d51820eae2e43322e8ebdf26d7..c1dfedad46ccda63a5044f5b258bc68c48e9837c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/MinCQUtils.py @@ -7,18 +7,18 @@ Related papers: """ from __future__ import print_function, division, absolute_import + from operator import xor import numpy as np -from sklearn.utils.validation import check_X_y from sklearn.ensemble import VotingClassifier from sklearn.manifold import SpectralEmbedding -from sklearn.utils.graph import graph_laplacian from sklearn.preprocessing import LabelEncoder +from sklearn.utils.graph import graph_laplacian +from sklearn.utils.validation import check_X_y - -from .BoostUtils import ConvexProgram, StumpsClassifiersGenerator -from ..MonoviewUtils import BaseMonoviewClassifier, CustomUniform, change_label_to_zero, change_label_to_minus +from .BoostUtils import ConvexProgram +from ..MonoviewUtils import change_label_to_zero, change_label_to_minus class MinCqClassifier(VotingClassifier): @@ -33,11 +33,14 @@ class MinCqClassifier(VotingClassifier): The fixed value of the first moment of the margin. """ - def __init__(self, estimators_generator=None, estimators=None, mu=0.001, omega=0.5, use_binary=False, zeta=0, gamma=1, n_neighbors=5): + + def __init__(self, estimators_generator=None, estimators=None, mu=0.001, + omega=0.5, use_binary=False, zeta=0, gamma=1, n_neighbors=5): if estimators is None: estimators = [] - super().__init__(estimators=estimators, voting='soft', flatten_transform=False) + super().__init__(estimators=estimators, voting='soft', + flatten_transform=False) self.estimators_generator = estimators_generator self.mu = mu self.omega = omega @@ -64,7 +67,8 @@ class MinCqClassifier(VotingClassifier): """ # Validations assert 0 < self.mu <= 1, "MinCqClassifier: mu parameter must be in (0, 1]" - assert xor(bool(self.estimators_generator), bool(self.estimators)), "MinCqClassifier: exactly one of estimator_generator or estimators must be used." + assert xor(bool(self.estimators_generator), bool( + self.estimators)), "MinCqClassifier: exactly one of estimator_generator or estimators must be used." X, y = check_X_y(X, change_label_to_minus(y)) # Fit the estimators using VotingClassifier's fit method. This will also fit a LabelEncoder that can be @@ -80,7 +84,7 @@ class MinCqClassifier(VotingClassifier): else: self.le_ = LabelEncoder() self.le_.fit(y) - self.clean_me =True + self.clean_me = True if isinstance(y, np.ma.MaskedArray): transformed_y = np.ma.MaskedArray(self.le_.transform(y), y.mask) @@ -89,7 +93,8 @@ class MinCqClassifier(VotingClassifier): transformed_y = y self.estimators_generator.fit(X, transformed_y) - self.estimators = [('ds{}'.format(i), estimator) for i, estimator in enumerate(self.estimators_generator.estimators_)] + self.estimators = [('ds{}'.format(i), estimator) for i, estimator in + enumerate(self.estimators_generator.estimators_)] super().fit(X, y) # Preparation and resolution of the quadratic program @@ -100,7 +105,13 @@ class MinCqClassifier(VotingClassifier): # print(self.weights.shape) # print(np.unique(self.weights)[0:10]) # import pdb;pdb.set_trace() - self.train_cbound = 1 - (1.0/X.shape[0])*(np.sum(np.multiply(change_label_to_minus(y), np.average(self._binary_classification_matrix(X), axis=1, weights=self.weights)))**2)/(np.sum(np.average(self._binary_classification_matrix(X), axis=1, weights=self.weights)**2)) + self.train_cbound = 1 - (1.0 / X.shape[0]) * (np.sum( + np.multiply(change_label_to_minus(y), + np.average(self._binary_classification_matrix(X), + axis=1, weights=self.weights))) ** 2) / ( + np.sum(np.average( + self._binary_classification_matrix(X), + axis=1, weights=self.weights) ** 2)) return self def _binary_classification_matrix(self, X): @@ -139,7 +150,8 @@ class MinCqClassifier(VotingClassifier): binary_labels = np.copy(y) binary_labels[y == 0] = -1 - multi_matrix = binary_labels.reshape((len(binary_labels), 1)) * classification_matrix + multi_matrix = binary_labels.reshape( + (len(binary_labels), 1)) * classification_matrix else: multi_matrix = self._multiclass_classification_matrix(X, y) @@ -153,7 +165,8 @@ class MinCqClassifier(VotingClassifier): objective_vector = None # Equality constraints (first moment of the margin equal to mu, Q sums to one) - equality_matrix = np.vstack((yf.reshape((1, n_voters)), np.ones((1, n_voters)))) + equality_matrix = np.vstack( + (yf.reshape((1, n_voters)), np.ones((1, n_voters)))) equality_vector = np.array([self.mu, 1.0]) # Lower and upper bounds, no quasi-uniformity. @@ -164,7 +177,9 @@ class MinCqClassifier(VotingClassifier): # upper_bound = 2.0/n_voters upper_bound = None - weights = self._solve_qp(objective_matrix, objective_vector, equality_matrix, equality_vector, lower_bound, upper_bound) + weights = self._solve_qp(objective_matrix, objective_vector, + equality_matrix, equality_vector, lower_bound, + upper_bound) # Keep learning information for further use. self.learner_info_ = {} @@ -172,12 +187,14 @@ class MinCqClassifier(VotingClassifier): # We count the number of non-zero weights, including the implicit voters. # TODO: Verify how we define non-zero weights here, could be if the weight is near 1/2n. n_nonzero_weights = np.sum(np.asarray(weights) > 1e-12) - n_nonzero_weights += np.sum(np.asarray(weights) < 1.0 / len(self.estimators_) - 1e-12) + n_nonzero_weights += np.sum( + np.asarray(weights) < 1.0 / len(self.estimators_) - 1e-12) self.learner_info_.update(n_nonzero_weights=n_nonzero_weights) return weights - def _solve_qp(self, objective_matrix, objective_vector, equality_matrix, equality_vector, lower_bound, upper_bound): + def _solve_qp(self, objective_matrix, objective_vector, equality_matrix, + equality_vector, lower_bound, upper_bound): try: qp = ConvexProgram() qp.quadratic_func, qp.linear_func = objective_matrix, objective_vector @@ -199,6 +216,7 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): [2] Risk Bounds for the Majority Vote: From a PAC-Bayesian Analysis to a Learning Algorithm (Germain et al., 2015) """ + def fit(self, X, y): import time beg = time.time() @@ -207,9 +225,11 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): # Validations if isinstance(y, np.ma.MaskedArray): - assert len(self.classes_[np.where(np.logical_not(self.classes_.mask))]) == 2, "RegularizedBinaryMinCqClassifier: only supports binary classification." + assert len(self.classes_[np.where(np.logical_not( + self.classes_.mask))]) == 2, "RegularizedBinaryMinCqClassifier: only supports binary classification." else: - assert len(self.classes_), "RegularizedBinaryMinCqClassifier: only supports binary classification." + assert len( + self.classes_), "RegularizedBinaryMinCqClassifier: only supports binary classification." # Then we "reverse" the negative weights and their associated voter's output. for i, weight in enumerate(self.weights): @@ -217,8 +237,8 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): # logger.debug("Reversing decision of a binary voter") self.weights[i] *= -1 self.estimators_[i].reverse_decision() - end=time.time() - self.train_time = end-beg + end = time.time() + self.train_time = end - beg return self def _solve(self, X, y): @@ -232,11 +252,13 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): if self.zeta == 0: np.transpose(classification_matrix) - ftf = np.dot(np.transpose(classification_matrix),classification_matrix) + ftf = np.dot(np.transpose(classification_matrix), + classification_matrix) else: I = np.eye(n_examples) L = build_laplacian(X, n_neighbors=self.n_neighbors) - ftf = classification_matrix.T.dot(I + (self.zeta / n_examples) * L).dot(classification_matrix) + ftf = classification_matrix.T.dot( + I + (self.zeta / n_examples) * L).dot(classification_matrix) # We use {-1, 1} labels. binary_labels = np.ma.copy(y) @@ -264,19 +286,21 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): upper_bound = 1.0 / n_voters try: - weights = self._solve_qp(objective_matrix, objective_vector, equality_matrix, equality_vector, lower_bound, upper_bound) + weights = self._solve_qp(objective_matrix, objective_vector, + equality_matrix, equality_vector, + lower_bound, upper_bound) except ValueError as e: if "domain error" in e.args: weights = np.ones(len(self.estimators_)) - # Keep learning information for further use. self.learner_info_ = {} # We count the number of non-zero weights, including the implicit voters. # TODO: Verify how we define non-zero weights here, could be if the weight is near 1/2n. n_nonzero_weights = np.sum(np.asarray(weights) > 1e-12) - n_nonzero_weights += np.sum(np.asarray(weights) < 1.0 / len(self.estimators_) - 1e-12) + n_nonzero_weights += np.sum( + np.asarray(weights) < 1.0 / len(self.estimators_) - 1e-12) self.learner_info_.update(n_nonzero_weights=n_nonzero_weights) # Conversion of the weights of the n first voters to weights on the implicit 2n voters. @@ -284,6 +308,7 @@ class RegularizedBinaryMinCqClassifier(MinCqClassifier): # return np.array([2 * q - 1.0 / len(self.estimators_) for q in weights]) return np.array(weights) + def build_laplacian(X, n_neighbors=None): clf = SpectralEmbedding(n_neighbors=n_neighbors) clf.fit(X) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py index 6011f7c6560d246634ad13df8632eb0168a18231..8e0d327205036ed3fbbfa030e6a455704f6721bc 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/PregenUtils.py @@ -1,6 +1,6 @@ - +from .BoostUtils import StumpsClassifiersGenerator, BaseBoost, \ + TreeClassifiersGenerator from ..MonoviewUtils import change_label_to_minus -from .BoostUtils import StumpsClassifiersGenerator, BaseBoost, TreeClassifiersGenerator class PregenClassifier(BaseBoost): @@ -13,9 +13,10 @@ class PregenClassifier(BaseBoost): n_stumps_per_attribute=self.n_stumps, self_complemented=self.self_complemented) elif generator is "Trees": - self.estimators_generator = TreeClassifiersGenerator(n_trees=self.n_stumps, max_depth=self.max_depth) + self.estimators_generator = TreeClassifiersGenerator( + n_trees=self.n_stumps, max_depth=self.max_depth) self.estimators_generator.fit(X, neg_y) else: - neg_y=None + neg_y = None classification_matrix = self._binary_classification_matrix(X) return classification_matrix, neg_y diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/SVCClassifier.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/SVCClassifier.py index 6778caad9d7ee9a6795e88d0cbf74b96be3a8dc2..76220f211d6669fe10c2bc15632e0daacda0e092 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/SVCClassifier.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/SVCClassifier.py @@ -3,7 +3,8 @@ from sklearn.svm import SVC class SVCClassifier(SVC): - def __init__(self, random_state=None, kernel='rbf', C=1.0, degree=3, **kwargs): + def __init__(self, random_state=None, kernel='rbf', C=1.0, degree=3, + **kwargs): super(SVCClassifier, self).__init__( C=C, kernel=kernel, @@ -11,7 +12,7 @@ class SVCClassifier(SVC): probability=True, max_iter=1000, random_state=random_state - ) + ) self.classed_params = [] self.weird_strings = {} @@ -21,4 +22,4 @@ class SVCClassifier(SVC): def getInterpret(self, directory, y_test): interpretString = "" - return interpretString \ No newline at end of file + return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx index 349c696e33b4ab83cab050876bb596e6376ce730..8e50ea220339c5d49d4327202517d51eea707a86 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx @@ -1,582 +1,610 @@ -# cython: cdivision=True -# cython: boundscheck=False -# cython: wraparound=False - -# Authors: Gilles Louppe <g.louppe@gmail.com> -# Peter Prettenhofer <peter.prettenhofer@gmail.com> -# Brian Holt <bdholt1@gmail.com> -# Noel Dawe <noel@dawe.me> -# Satrajit Gosh <satrajit.ghosh@gmail.com> -# Lars Buitinck -# Arnaud Joly <arnaud.v.joly@gmail.com> -# Joel Nothman <joel.nothman@gmail.com> -# Fares Hedayati <fares.hedayati@gmail.com> -# Jacob Schreiber <jmschreiber91@gmail.com> -# Nelson Liu <nelson@nelsonliu.me> -# -# License: BSD 3 clause - -from libc.stdlib cimport calloc -from libc.stdlib cimport free -from libc.string cimport memcpy -from libc.string cimport memset -from libc.math cimport fabs -from libc.stdlib cimport malloc -from libc.stdlib cimport realloc -# from libc.math cimport log as ln - -import numpy as np -cimport numpy as np -np.import_array() -# from sklearn.tree._criterion cimport Criterion, ClassificationCriterion - -cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except *: - # sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython - # 0.20.1 to crash. - cdef size_t nbytes = nelems * sizeof(p[0][0]) - if nbytes / sizeof(p[0][0]) != nelems: - # Overflow in the multiplication - with gil: - raise MemoryError("could not allocate (%d * %d) bytes" - % (nelems, sizeof(p[0][0]))) - cdef realloc_ptr tmp = <realloc_ptr>realloc(p[0], nbytes) - if tmp == NULL: - with gil: - raise MemoryError("could not allocate %d bytes" % nbytes) - p[0] = tmp - return tmp # for - - -cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size): - """Return copied data as 1D numpy array of intp's.""" - cdef np.npy_intp shape[1] - shape[0] = <np.npy_intp> size - return np.PyArray_SimpleNewFromData(1, shape, np.NPY_INTP, data).copy() - - -cdef class CustomCriterion: - """Interface for impurity criteria. - This object stores methods on how to calculate how good a split is using - different metrics. - """ - - def __dealloc__(self): - """Destructor.""" - - free(self.sum_total) - free(self.sum_left) - free(self.sum_right) - - def __getstate__(self): - return {} - - def __setstate__(self, d): - pass - - cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil except -1: - """Placeholder for a method which will initialize the criterion. - Returns -1 in case of failure to allocate memory (and raise MemoryError) - or 0 otherwise. - Parameters - ---------- - y : array-like, dtype=DOUBLE_t - y is a buffer that can store values for n_outputs target variables - y_stride : SIZE_t - y_stride is used to index the kth output value as follows: - y[i, k] = y[i * y_stride + k] - sample_weight : array-like, dtype=DOUBLE_t - The weight of each sample - weighted_n_samples : DOUBLE_t - The total weight of the samples being considered - samples : array-like, dtype=DOUBLE_t - Indices of the samples in X and y, where samples[start:end] - correspond to the samples in this node - start : SIZE_t - The first sample to be used on this node - end : SIZE_t - The last sample used on this node - """ - - pass -# -# cdef int reset(self) nogil except -1: -# """Reset the criterion at pos=start. -# This method must be implemented by the subclass. -# """ -# -# pass -# -# cdef int reverse_reset(self) nogil except -1: -# """Reset the criterion at pos=end. -# This method must be implemented by the subclass. -# """ -# pass -# -# cdef int update(self, SIZE_t new_pos) nogil except -1: -# """Updated statistics by moving samples[pos:new_pos] to the left child. -# This updates the collected statistics by moving samples[pos:new_pos] -# from the right child to the left child. It must be implemented by -# the subclass. -# Parameters -# ---------- -# new_pos : SIZE_t -# New starting index position of the samples in the right child -# """ -# -# pass -# -# cdef double node_impurity(self) nogil: -# """Placeholder for calculating the impurity of the node. -# Placeholder for a method which will evaluate the impurity of -# the current node, i.e. the impurity of samples[start:end]. This is the -# primary function of the criterion class. -# """ -# -# pass -# -# cdef void children_impurity(self, double* impurity_left, -# double* impurity_right) nogil: -# """Placeholder for calculating the impurity of children. -# Placeholder for a method which evaluates the impurity in -# children nodes, i.e. the impurity of samples[start:pos] + the impurity -# of samples[pos:end]. -# Parameters -# ---------- -# impurity_left : double pointer -# The memory address where the impurity of the left child should be -# stored. -# impurity_right : double pointer -# The memory address where the impurity of the right child should be -# stored -# """ -# -# pass -# -# cdef void node_value(self, double* dest) nogil: -# """Placeholder for storing the node value. -# Placeholder for a method which will compute the node value -# of samples[start:end] and save the value into dest. -# Parameters -# ---------- -# dest : double pointer -# The memory address where the node value should be stored. -# """ -# -# pass -# -# cdef double proxy_impurity_improvement(self) nogil: -# """Compute a proxy of the impurity reduction -# This method is used to speed up the search for the best split. -# It is a proxy quantity such that the split that maximizes this value -# also maximizes the impurity improvement. It neglects all constant terms -# of the impurity decrease for a given split. -# The absolute impurity improvement is only computed by the -# impurity_improvement method once the best split has been found. -# """ -# cdef double impurity_left -# cdef double impurity_right -# self.children_impurity(&impurity_left, &impurity_right) -# -# return (- self.weighted_n_right * impurity_right -# - self.weighted_n_left * impurity_left) -# -# cdef double impurity_improvement(self, double impurity) nogil: -# """Compute the improvement in impurity -# This method computes the improvement in impurity when a split occurs. -# The weighted impurity improvement equation is the following: -# N_t / N * (impurity - N_t_R / N_t * right_impurity -# - N_t_L / N_t * left_impurity) -# where N is the total number of samples, N_t is the number of samples -# at the current node, N_t_L is the number of samples in the left child, -# and N_t_R is the number of samples in the right child, -# Parameters -# ---------- -# impurity : double -# The initial impurity of the node before the split -# Return -# ------ -# double : improvement in impurity after the split occurs -# """ -# -# cdef double impurity_left -# cdef double impurity_right -# -# self.children_impurity(&impurity_left, &impurity_right) -# -# return ((self.weighted_n_node_samples / self.weighted_n_samples) * -# (impurity - (self.weighted_n_right / -# self.weighted_n_node_samples * impurity_right) -# - (self.weighted_n_left / -# self.weighted_n_node_samples * impurity_left))) -# -# -# cdef class CustomClassificationCriterion(Criterion): -# """Abstract criterion for classification.""" -# -# def __cinit__(self, SIZE_t n_outputs, -# np.ndarray[SIZE_t, ndim=1] n_classes): -# """Initialize attributes for this criterion. -# Parameters -# ---------- -# n_outputs : SIZE_t -# The number of targets, the dimensionality of the prediction -# n_classes : numpy.ndarray, dtype=SIZE_t -# The number of unique classes in each target -# """ -# -# self.y = NULL -# self.y_stride = 0 -# self.sample_weight = NULL -# -# self.samples = NULL -# self.start = 0 -# self.pos = 0 -# self.end = 0 -# -# self.n_outputs = n_outputs -# self.n_samples = 0 -# self.n_node_samples = 0 -# self.weighted_n_node_samples = 0.0 -# self.weighted_n_left = 0.0 -# self.weighted_n_right = 0.0 -# -# # Count labels for each output -# self.sum_total = NULL -# self.sum_left = NULL -# self.sum_right = NULL -# self.n_classes = NULL -# -# safe_realloc(&self.n_classes, n_outputs) -# -# cdef SIZE_t k = 0 -# cdef SIZE_t sum_stride = 0 -# -# # For each target, set the number of unique classes in that target, -# # and also compute the maximal stride of all targets -# for k in range(n_outputs): -# self.n_classes[k] = n_classes[k] -# -# if n_classes[k] > sum_stride: -# sum_stride = n_classes[k] -# -# self.sum_stride = sum_stride -# -# cdef SIZE_t n_elements = n_outputs * sum_stride -# self.sum_total = <double*> calloc(n_elements, sizeof(double)) -# self.sum_left = <double*> calloc(n_elements, sizeof(double)) -# self.sum_right = <double*> calloc(n_elements, sizeof(double)) -# -# if (self.sum_total == NULL or -# self.sum_left == NULL or -# self.sum_right == NULL): -# raise MemoryError() +# # cython: cdivision=True +# # cython: boundscheck=False +# # cython: wraparound=False +# +# # Authors: Gilles Louppe <g.louppe@gmail.com> +# # Peter Prettenhofer <peter.prettenhofer@gmail.com> +# # Brian Holt <bdholt1@gmail.com> +# # Noel Dawe <noel@dawe.me> +# # Satrajit Gosh <satrajit.ghosh@gmail.com> +# # Lars Buitinck +# # Arnaud Joly <arnaud.v.joly@gmail.com> +# # Joel Nothman <joel.nothman@gmail.com> +# # Fares Hedayati <fares.hedayati@gmail.com> +# # Jacob Schreiber <jmschreiber91@gmail.com> +# # Nelson Liu <nelson@nelsonliu.me> +# # +# # License: BSD 3 clause +# +# calloc +# +# free +# +# memcpy +# +# memset +# +# fabs +# +# malloc +# +# realloc +# # from libc.math cimport log as ln +# +# import numpy as np +# +# cimport +# numpy as np +# np.import_array() +# # from sklearn.tree._criterion cimport Criterion, ClassificationCriterion +# +# cdef +# realloc_ptr +# safe_realloc(realloc_ptr * p, size_t +# nelems) nogil except *: +# # sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython +# # 0.20.1 to crash. +# cdef +# size_t +# nbytes = nelems * sizeof(p[0][0]) +# if nbytes / sizeof(p[0][0]) != nelems: +# # Overflow in the multiplication +# with gil: +# raise MemoryError("could not allocate (%d * %d) bytes" +# % (nelems, sizeof(p[0][0]))) +# cdef +# realloc_ptr +# tmp = < realloc_ptr > realloc(p[0], nbytes) +# if tmp == NULL: +# with gil: +# raise MemoryError("could not allocate %d bytes" % nbytes) +# p[0] = tmp +# return tmp # for +# +# cdef +# inline +# np.ndarray +# sizet_ptr_to_ndarray(SIZE_t * data, SIZE_t +# size): +# """Return copied data as 1D numpy array of intp's.""" +# cdef +# np.npy_intp +# shape[1] +# shape[0] = < np.npy_intp > size +# return np.PyArray_SimpleNewFromData(1, shape, np.NPY_INTP, data).copy() +# +# cdef +# +# +# class CustomCriterion: +# """Interface for impurity criteria. +# This object stores methods on how to calculate how good a split is using +# different metrics. +# """ # # def __dealloc__(self): # """Destructor.""" -# free(self.n_classes) -# -# def __reduce__(self): -# return (type(self), -# (self.n_outputs, -# sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)), -# self.__getstate__()) -# -# cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, -# DOUBLE_t* sample_weight, double weighted_n_samples, -# SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: -# """Initialize the criterion at node samples[start:end] and -# children samples[start:start] and samples[start:end]. -# Returns -1 in case of failure to allocate memory (and raise MemoryError) -# or 0 otherwise. -# Parameters -# ---------- -# y : array-like, dtype=DOUBLE_t -# The target stored as a buffer for memory efficiency -# y_stride : SIZE_t -# The stride between elements in the buffer, important if there -# are multiple targets (multi-output) -# sample_weight : array-like, dtype=DTYPE_t -# The weight of each sample -# weighted_n_samples : SIZE_t -# The total weight of all samples -# samples : array-like, dtype=SIZE_t -# A mask on the samples, showing which ones we want to use -# start : SIZE_t -# The first sample to use in the mask -# end : SIZE_t -# The last sample to use in the mask -# """ -# -# self.y = y -# self.y_stride = y_stride -# self.sample_weight = sample_weight -# self.samples = samples -# self.start = start -# self.end = end -# self.n_node_samples = end - start -# self.weighted_n_samples = weighted_n_samples -# self.weighted_n_node_samples = 0.0 -# -# cdef SIZE_t* n_classes = self.n_classes -# cdef double* sum_total = self.sum_total # -# cdef SIZE_t i -# cdef SIZE_t p -# cdef SIZE_t k -# cdef SIZE_t c -# cdef DOUBLE_t w = 1.0 -# cdef SIZE_t offset = 0 +# free(self.sum_total) +# free(self.sum_left) +# free(self.sum_right) # -# for k in range(self.n_outputs): -# memset(sum_total + offset, 0, n_classes[k] * sizeof(double)) -# offset += self.sum_stride +# def __getstate__(self): +# return {} # -# for p in range(start, end): -# i = samples[p] -# -# # w is originally set to be 1.0, meaning that if no sample weights -# # are given, the default weight of each sample is 1.0 -# if sample_weight != NULL: -# w = sample_weight[i] -# -# # Count weighted class frequency for each target -# for k in range(self.n_outputs): -# c = <SIZE_t> y[i * y_stride + k] -# sum_total[k * self.sum_stride + c] += w -# -# self.weighted_n_node_samples += w -# -# # Reset to pos=start -# self.reset() -# return 0 -# -# cdef int reset(self) nogil except -1: -# """Reset the criterion at pos=start -# Returns -1 in case of failure to allocate memory (and raise MemoryError) -# or 0 otherwise. -# """ -# self.pos = self.start -# -# self.weighted_n_left = 0.0 -# self.weighted_n_right = self.weighted_n_node_samples -# -# cdef double* sum_total = self.sum_total -# cdef double* sum_left = self.sum_left -# cdef double* sum_right = self.sum_right -# -# cdef SIZE_t* n_classes = self.n_classes -# cdef SIZE_t k -# -# for k in range(self.n_outputs): -# memset(sum_left, 0, n_classes[k] * sizeof(double)) -# memcpy(sum_right, sum_total, n_classes[k] * sizeof(double)) -# -# sum_total += self.sum_stride -# sum_left += self.sum_stride -# sum_right += self.sum_stride -# return 0 -# -# cdef int reverse_reset(self) nogil except -1: -# """Reset the criterion at pos=end -# Returns -1 in case of failure to allocate memory (and raise MemoryError) -# or 0 otherwise. -# """ -# self.pos = self.end -# -# self.weighted_n_left = self.weighted_n_node_samples -# self.weighted_n_right = 0.0 -# -# cdef double* sum_total = self.sum_total -# cdef double* sum_left = self.sum_left -# cdef double* sum_right = self.sum_right -# -# cdef SIZE_t* n_classes = self.n_classes -# cdef SIZE_t k -# -# for k in range(self.n_outputs): -# memset(sum_right, 0, n_classes[k] * sizeof(double)) -# memcpy(sum_left, sum_total, n_classes[k] * sizeof(double)) -# -# sum_total += self.sum_stride -# sum_left += self.sum_stride -# sum_right += self.sum_stride -# return 0 -# -# cdef int update(self, SIZE_t new_pos) nogil except -1: -# """Updated statistics by moving samples[pos:new_pos] to the left child. -# Returns -1 in case of failure to allocate memory (and raise MemoryError) -# or 0 otherwise. -# Parameters -# ---------- -# new_pos : SIZE_t -# The new ending position for which to move samples from the right -# child to the left child. -# """ -# cdef DOUBLE_t* y = self.y -# cdef SIZE_t pos = self.pos -# cdef SIZE_t end = self.end -# -# cdef double* sum_left = self.sum_left -# cdef double* sum_right = self.sum_right -# cdef double* sum_total = self.sum_total -# -# cdef SIZE_t* n_classes = self.n_classes -# cdef SIZE_t* samples = self.samples -# cdef DOUBLE_t* sample_weight = self.sample_weight -# -# cdef SIZE_t i -# cdef SIZE_t p -# cdef SIZE_t k -# cdef SIZE_t c -# cdef SIZE_t label_index -# cdef DOUBLE_t w = 1.0 -# -# # Update statistics up to new_pos -# # -# # Given that -# # sum_left[x] + sum_right[x] = sum_total[x] -# # and that sum_total is known, we are going to update -# # sum_left from the direction that require the least amount -# # of computations, i.e. from pos to new_pos or from end to new_po. -# -# if (new_pos - pos) <= (end - new_pos): -# for p in range(pos, new_pos): -# i = samples[p] -# -# if sample_weight != NULL: -# w = sample_weight[i] -# -# for k in range(self.n_outputs): -# label_index = (k * self.sum_stride + -# <SIZE_t> y[i * self.y_stride + k]) -# sum_left[label_index] += w -# -# self.weighted_n_left += w -# -# else: -# self.reverse_reset() -# -# for p in range(end - 1, new_pos - 1, -1): -# i = samples[p] -# -# if sample_weight != NULL: -# w = sample_weight[i] -# -# for k in range(self.n_outputs): -# label_index = (k * self.sum_stride + -# <SIZE_t> y[i * self.y_stride + k]) -# sum_left[label_index] -= w -# -# self.weighted_n_left -= w -# -# # Update right part statistics -# self.weighted_n_right = self.weighted_n_node_samples - self.weighted_n_left -# for k in range(self.n_outputs): -# for c in range(n_classes[k]): -# sum_right[c] = sum_total[c] - sum_left[c] -# -# sum_right += self.sum_stride -# sum_left += self.sum_stride -# sum_total += self.sum_stride -# -# self.pos = new_pos -# return 0 -# -# cdef double node_impurity(self) nogil: -# pass -# -# cdef void children_impurity(self, double* impurity_left, -# double* impurity_right) nogil: +# def __setstate__(self, d): # pass # -# cdef void node_value(self, double* dest) nogil: -# """Compute the node value of samples[start:end] and save it into dest. -# Parameters -# ---------- -# dest : double pointer -# The memory address which we will save the node value into. -# """ -# -# cdef double* sum_total = self.sum_total -# cdef SIZE_t* n_classes = self.n_classes -# cdef SIZE_t k -# -# for k in range(self.n_outputs): -# memcpy(dest, sum_total, n_classes[k] * sizeof(double)) -# dest += self.sum_stride -# sum_total += self.sum_stride -# -# cdef class CCriterion(CustomClassificationCriterion): -# r"""Cross Entropy impurity criterion. -# This handles cases where the target is a classification taking values -# 0, 1, ... K-2, K-1. If node m represents a region Rm with Nm observations, -# then let -# count_k = 1 / Nm \sum_{x_i in Rm} I(yi = k) -# be the proportion of class k observations in node m. -# The cross-entropy is then defined as -# cross-entropy = -\sum_{k=0}^{K-1} count_k log(count_k) +# cdef +# int +# init(self, DOUBLE_t * y, SIZE_t +# y_stride, DOUBLE_t * sample_weight, +# double +# weighted_n_samples, SIZE_t * samples, SIZE_t +# start, +# SIZE_t +# end) nogil except -1: +# """Placeholder for a method which will initialize the criterion. +# Returns -1 in case of failure to allocate memory (and raise MemoryError) +# or 0 otherwise. +# Parameters +# ---------- +# y : array-like, dtype=DOUBLE_t +# y is a buffer that can store values for n_outputs target variables +# y_stride : SIZE_t +# y_stride is used to index the kth output value as follows: +# y[i, k] = y[i * y_stride + k] +# sample_weight : array-like, dtype=DOUBLE_t +# The weight of each sample +# weighted_n_samples : DOUBLE_t +# The total weight of the samples being considered +# samples : array-like, dtype=DOUBLE_t +# Indices of the samples in X and y, where samples[start:end] +# correspond to the samples in this node +# start : SIZE_t +# The first sample to be used on this node +# end : SIZE_t +# The last sample used on this node # """ # -# cdef double node_impurity(self) nogil: -# """Evaluate the impurity of the current node, i.e. the impurity of -# samples[start:end], using the cross-entropy criterion.""" -# -# # cdef SIZE_t* n_classes = self.n_classes -# # cdef double* sum_total = self.sum_total -# # cdef double entropy = 0.0 -# # cdef double count_k -# # cdef SIZE_t k -# # cdef SIZE_t c -# # -# # for k in range(self.n_outputs): -# # for c in range(n_classes[k]): -# # count_k = sum_total[c] -# # if count_k > 0.0: -# # count_k /= self.weighted_n_node_samples -# # entropy -= count_k * log(count_k) -# # -# # sum_total += self.sum_stride -# -# return 1.0 -# -# cdef void children_impurity(self, double* impurity_left, -# double* impurity_right) nogil: -# """Evaluate the impurity in children nodes -# i.e. the impurity of the left child (samples[start:pos]) and the -# impurity the right child (samples[pos:end]). -# Parameters -# ---------- -# impurity_left : double pointer -# The memory address to save the impurity of the left node -# impurity_right : double pointer -# The memory address to save the impurity of the right node -# """ -# -# # cdef SIZE_t* n_classes = self.n_classes -# # cdef double* sum_left = self.sum_left -# # cdef double* sum_right = self.sum_right -# # cdef double entropy_left = 0.0 -# # cdef double entropy_right = 0.0 -# # cdef double count_k -# # cdef SIZE_t k -# # cdef SIZE_t c -# # -# # for k in range(self.n_outputs): -# # for c in range(n_classes[k]): -# # count_k = sum_left[c] -# # if count_k > 0.0: -# # count_k /= self.weighted_n_left -# # entropy_left -= count_k * log(count_k) -# # -# # count_k = sum_right[c] -# # if count_k > 0.0: -# # count_k /= self.weighted_n_right -# # entropy_right -= count_k * log(count_k) -# # -# # sum_left += self.sum_stride -# # sum_right += self.sum_stride -# # -# # impurity_left[0] = entropy_left / self.n_outputs -# # impurity_right[0] = entropy_right / self.n_outputs \ No newline at end of file +# pass +# # +# # cdef int reset(self) nogil except -1: +# # """Reset the criterion at pos=start. +# # This method must be implemented by the subclass. +# # """ +# # +# # pass +# # +# # cdef int reverse_reset(self) nogil except -1: +# # """Reset the criterion at pos=end. +# # This method must be implemented by the subclass. +# # """ +# # pass +# # +# # cdef int update(self, SIZE_t new_pos) nogil except -1: +# # """Updated statistics by moving samples[pos:new_pos] to the left child. +# # This updates the collected statistics by moving samples[pos:new_pos] +# # from the right child to the left child. It must be implemented by +# # the subclass. +# # Parameters +# # ---------- +# # new_pos : SIZE_t +# # New starting index position of the samples in the right child +# # """ +# # +# # pass +# # +# # cdef double node_impurity(self) nogil: +# # """Placeholder for calculating the impurity of the node. +# # Placeholder for a method which will evaluate the impurity of +# # the current node, i.e. the impurity of samples[start:end]. This is the +# # primary function of the criterion class. +# # """ +# # +# # pass +# # +# # cdef void children_impurity(self, double* impurity_left, +# # double* impurity_right) nogil: +# # """Placeholder for calculating the impurity of children. +# # Placeholder for a method which evaluates the impurity in +# # children nodes, i.e. the impurity of samples[start:pos] + the impurity +# # of samples[pos:end]. +# # Parameters +# # ---------- +# # impurity_left : double pointer +# # The memory address where the impurity of the left child should be +# # stored. +# # impurity_right : double pointer +# # The memory address where the impurity of the right child should be +# # stored +# # """ +# # +# # pass +# # +# # cdef void node_value(self, double* dest) nogil: +# # """Placeholder for storing the node value. +# # Placeholder for a method which will compute the node value +# # of samples[start:end] and save the value into dest. +# # Parameters +# # ---------- +# # dest : double pointer +# # The memory address where the node value should be stored. +# # """ +# # +# # pass +# # +# # cdef double proxy_impurity_improvement(self) nogil: +# # """Compute a proxy of the impurity reduction +# # This method is used to speed up the search for the best split. +# # It is a proxy quantity such that the split that maximizes this value +# # also maximizes the impurity improvement. It neglects all constant terms +# # of the impurity decrease for a given split. +# # The absolute impurity improvement is only computed by the +# # impurity_improvement method once the best split has been found. +# # """ +# # cdef double impurity_left +# # cdef double impurity_right +# # self.children_impurity(&impurity_left, &impurity_right) +# # +# # return (- self.weighted_n_right * impurity_right +# # - self.weighted_n_left * impurity_left) +# # +# # cdef double impurity_improvement(self, double impurity) nogil: +# # """Compute the improvement in impurity +# # This method computes the improvement in impurity when a split occurs. +# # The weighted impurity improvement equation is the following: +# # N_t / N * (impurity - N_t_R / N_t * right_impurity +# # - N_t_L / N_t * left_impurity) +# # where N is the total number of samples, N_t is the number of samples +# # at the current node, N_t_L is the number of samples in the left child, +# # and N_t_R is the number of samples in the right child, +# # Parameters +# # ---------- +# # impurity : double +# # The initial impurity of the node before the split +# # Return +# # ------ +# # double : improvement in impurity after the split occurs +# # """ +# # +# # cdef double impurity_left +# # cdef double impurity_right +# # +# # self.children_impurity(&impurity_left, &impurity_right) +# # +# # return ((self.weighted_n_node_samples / self.weighted_n_samples) * +# # (impurity - (self.weighted_n_right / +# # self.weighted_n_node_samples * impurity_right) +# # - (self.weighted_n_left / +# # self.weighted_n_node_samples * impurity_left))) +# # +# # +# # cdef class CustomClassificationCriterion(Criterion): +# # """Abstract criterion for classification.""" +# # +# # def __cinit__(self, SIZE_t n_outputs, +# # np.ndarray[SIZE_t, ndim=1] n_classes): +# # """Initialize attributes for this criterion. +# # Parameters +# # ---------- +# # n_outputs : SIZE_t +# # The number of targets, the dimensionality of the prediction +# # n_classes : numpy.ndarray, dtype=SIZE_t +# # The number of unique classes in each target +# # """ +# # +# # self.y = NULL +# # self.y_stride = 0 +# # self.sample_weight = NULL +# # +# # self.samples = NULL +# # self.start = 0 +# # self.pos = 0 +# # self.end = 0 +# # +# # self.n_outputs = n_outputs +# # self.n_samples = 0 +# # self.n_node_samples = 0 +# # self.weighted_n_node_samples = 0.0 +# # self.weighted_n_left = 0.0 +# # self.weighted_n_right = 0.0 +# # +# # # Count labels for each output +# # self.sum_total = NULL +# # self.sum_left = NULL +# # self.sum_right = NULL +# # self.n_classes = NULL +# # +# # safe_realloc(&self.n_classes, n_outputs) +# # +# # cdef SIZE_t k = 0 +# # cdef SIZE_t sum_stride = 0 +# # +# # # For each target, set the number of unique classes in that target, +# # # and also compute the maximal stride of all targets +# # for k in range(n_outputs): +# # self.n_classes[k] = n_classes[k] +# # +# # if n_classes[k] > sum_stride: +# # sum_stride = n_classes[k] +# # +# # self.sum_stride = sum_stride +# # +# # cdef SIZE_t n_elements = n_outputs * sum_stride +# # self.sum_total = <double*> calloc(n_elements, sizeof(double)) +# # self.sum_left = <double*> calloc(n_elements, sizeof(double)) +# # self.sum_right = <double*> calloc(n_elements, sizeof(double)) +# # +# # if (self.sum_total == NULL or +# # self.sum_left == NULL or +# # self.sum_right == NULL): +# # raise MemoryError() +# # +# # def __dealloc__(self): +# # """Destructor.""" +# # free(self.n_classes) +# # +# # def __reduce__(self): +# # return (type(self), +# # (self.n_outputs, +# # sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)), +# # self.__getstate__()) +# # +# # cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, +# # DOUBLE_t* sample_weight, double weighted_n_samples, +# # SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: +# # """Initialize the criterion at node samples[start:end] and +# # children samples[start:start] and samples[start:end]. +# # Returns -1 in case of failure to allocate memory (and raise MemoryError) +# # or 0 otherwise. +# # Parameters +# # ---------- +# # y : array-like, dtype=DOUBLE_t +# # The target stored as a buffer for memory efficiency +# # y_stride : SIZE_t +# # The stride between elements in the buffer, important if there +# # are multiple targets (multi-output) +# # sample_weight : array-like, dtype=DTYPE_t +# # The weight of each sample +# # weighted_n_samples : SIZE_t +# # The total weight of all samples +# # samples : array-like, dtype=SIZE_t +# # A mask on the samples, showing which ones we want to use +# # start : SIZE_t +# # The first sample to use in the mask +# # end : SIZE_t +# # The last sample to use in the mask +# # """ +# # +# # self.y = y +# # self.y_stride = y_stride +# # self.sample_weight = sample_weight +# # self.samples = samples +# # self.start = start +# # self.end = end +# # self.n_node_samples = end - start +# # self.weighted_n_samples = weighted_n_samples +# # self.weighted_n_node_samples = 0.0 +# # +# # cdef SIZE_t* n_classes = self.n_classes +# # cdef double* sum_total = self.sum_total +# # +# # cdef SIZE_t i +# # cdef SIZE_t p +# # cdef SIZE_t k +# # cdef SIZE_t c +# # cdef DOUBLE_t w = 1.0 +# # cdef SIZE_t offset = 0 +# # +# # for k in range(self.n_outputs): +# # memset(sum_total + offset, 0, n_classes[k] * sizeof(double)) +# # offset += self.sum_stride +# # +# # for p in range(start, end): +# # i = samples[p] +# # +# # # w is originally set to be 1.0, meaning that if no sample weights +# # # are given, the default weight of each sample is 1.0 +# # if sample_weight != NULL: +# # w = sample_weight[i] +# # +# # # Count weighted class frequency for each target +# # for k in range(self.n_outputs): +# # c = <SIZE_t> y[i * y_stride + k] +# # sum_total[k * self.sum_stride + c] += w +# # +# # self.weighted_n_node_samples += w +# # +# # # Reset to pos=start +# # self.reset() +# # return 0 +# # +# # cdef int reset(self) nogil except -1: +# # """Reset the criterion at pos=start +# # Returns -1 in case of failure to allocate memory (and raise MemoryError) +# # or 0 otherwise. +# # """ +# # self.pos = self.start +# # +# # self.weighted_n_left = 0.0 +# # self.weighted_n_right = self.weighted_n_node_samples +# # +# # cdef double* sum_total = self.sum_total +# # cdef double* sum_left = self.sum_left +# # cdef double* sum_right = self.sum_right +# # +# # cdef SIZE_t* n_classes = self.n_classes +# # cdef SIZE_t k +# # +# # for k in range(self.n_outputs): +# # memset(sum_left, 0, n_classes[k] * sizeof(double)) +# # memcpy(sum_right, sum_total, n_classes[k] * sizeof(double)) +# # +# # sum_total += self.sum_stride +# # sum_left += self.sum_stride +# # sum_right += self.sum_stride +# # return 0 +# # +# # cdef int reverse_reset(self) nogil except -1: +# # """Reset the criterion at pos=end +# # Returns -1 in case of failure to allocate memory (and raise MemoryError) +# # or 0 otherwise. +# # """ +# # self.pos = self.end +# # +# # self.weighted_n_left = self.weighted_n_node_samples +# # self.weighted_n_right = 0.0 +# # +# # cdef double* sum_total = self.sum_total +# # cdef double* sum_left = self.sum_left +# # cdef double* sum_right = self.sum_right +# # +# # cdef SIZE_t* n_classes = self.n_classes +# # cdef SIZE_t k +# # +# # for k in range(self.n_outputs): +# # memset(sum_right, 0, n_classes[k] * sizeof(double)) +# # memcpy(sum_left, sum_total, n_classes[k] * sizeof(double)) +# # +# # sum_total += self.sum_stride +# # sum_left += self.sum_stride +# # sum_right += self.sum_stride +# # return 0 +# # +# # cdef int update(self, SIZE_t new_pos) nogil except -1: +# # """Updated statistics by moving samples[pos:new_pos] to the left child. +# # Returns -1 in case of failure to allocate memory (and raise MemoryError) +# # or 0 otherwise. +# # Parameters +# # ---------- +# # new_pos : SIZE_t +# # The new ending position for which to move samples from the right +# # child to the left child. +# # """ +# # cdef DOUBLE_t* y = self.y +# # cdef SIZE_t pos = self.pos +# # cdef SIZE_t end = self.end +# # +# # cdef double* sum_left = self.sum_left +# # cdef double* sum_right = self.sum_right +# # cdef double* sum_total = self.sum_total +# # +# # cdef SIZE_t* n_classes = self.n_classes +# # cdef SIZE_t* samples = self.samples +# # cdef DOUBLE_t* sample_weight = self.sample_weight +# # +# # cdef SIZE_t i +# # cdef SIZE_t p +# # cdef SIZE_t k +# # cdef SIZE_t c +# # cdef SIZE_t label_index +# # cdef DOUBLE_t w = 1.0 +# # +# # # Update statistics up to new_pos +# # # +# # # Given that +# # # sum_left[x] + sum_right[x] = sum_total[x] +# # # and that sum_total is known, we are going to update +# # # sum_left from the direction that require the least amount +# # # of computations, i.e. from pos to new_pos or from end to new_po. +# # +# # if (new_pos - pos) <= (end - new_pos): +# # for p in range(pos, new_pos): +# # i = samples[p] +# # +# # if sample_weight != NULL: +# # w = sample_weight[i] +# # +# # for k in range(self.n_outputs): +# # label_index = (k * self.sum_stride + +# # <SIZE_t> y[i * self.y_stride + k]) +# # sum_left[label_index] += w +# # +# # self.weighted_n_left += w +# # +# # else: +# # self.reverse_reset() +# # +# # for p in range(end - 1, new_pos - 1, -1): +# # i = samples[p] +# # +# # if sample_weight != NULL: +# # w = sample_weight[i] +# # +# # for k in range(self.n_outputs): +# # label_index = (k * self.sum_stride + +# # <SIZE_t> y[i * self.y_stride + k]) +# # sum_left[label_index] -= w +# # +# # self.weighted_n_left -= w +# # +# # # Update right part statistics +# # self.weighted_n_right = self.weighted_n_node_samples - self.weighted_n_left +# # for k in range(self.n_outputs): +# # for c in range(n_classes[k]): +# # sum_right[c] = sum_total[c] - sum_left[c] +# # +# # sum_right += self.sum_stride +# # sum_left += self.sum_stride +# # sum_total += self.sum_stride +# # +# # self.pos = new_pos +# # return 0 +# # +# # cdef double node_impurity(self) nogil: +# # pass +# # +# # cdef void children_impurity(self, double* impurity_left, +# # double* impurity_right) nogil: +# # pass +# # +# # cdef void node_value(self, double* dest) nogil: +# # """Compute the node value of samples[start:end] and save it into dest. +# # Parameters +# # ---------- +# # dest : double pointer +# # The memory address which we will save the node value into. +# # """ +# # +# # cdef double* sum_total = self.sum_total +# # cdef SIZE_t* n_classes = self.n_classes +# # cdef SIZE_t k +# # +# # for k in range(self.n_outputs): +# # memcpy(dest, sum_total, n_classes[k] * sizeof(double)) +# # dest += self.sum_stride +# # sum_total += self.sum_stride +# # +# # cdef class CCriterion(CustomClassificationCriterion): +# # r"""Cross Entropy impurity criterion. +# # This handles cases where the target is a classification taking values +# # 0, 1, ... K-2, K-1. If node m represents a region Rm with Nm observations, +# # then let +# # count_k = 1 / Nm \sum_{x_i in Rm} I(yi = k) +# # be the proportion of class k observations in node m. +# # The cross-entropy is then defined as +# # cross-entropy = -\sum_{k=0}^{K-1} count_k log(count_k) +# # """ +# # +# # cdef double node_impurity(self) nogil: +# # """Evaluate the impurity of the current node, i.e. the impurity of +# # samples[start:end], using the cross-entropy criterion.""" +# # +# # # cdef SIZE_t* n_classes = self.n_classes +# # # cdef double* sum_total = self.sum_total +# # # cdef double entropy = 0.0 +# # # cdef double count_k +# # # cdef SIZE_t k +# # # cdef SIZE_t c +# # # +# # # for k in range(self.n_outputs): +# # # for c in range(n_classes[k]): +# # # count_k = sum_total[c] +# # # if count_k > 0.0: +# # # count_k /= self.weighted_n_node_samples +# # # entropy -= count_k * log(count_k) +# # # +# # # sum_total += self.sum_stride +# # +# # return 1.0 +# # +# # cdef void children_impurity(self, double* impurity_left, +# # double* impurity_right) nogil: +# # """Evaluate the impurity in children nodes +# # i.e. the impurity of the left child (samples[start:pos]) and the +# # impurity the right child (samples[pos:end]). +# # Parameters +# # ---------- +# # impurity_left : double pointer +# # The memory address to save the impurity of the left node +# # impurity_right : double pointer +# # The memory address to save the impurity of the right node +# # """ +# # +# # # cdef SIZE_t* n_classes = self.n_classes +# # # cdef double* sum_left = self.sum_left +# # # cdef double* sum_right = self.sum_right +# # # cdef double entropy_left = 0.0 +# # # cdef double entropy_right = 0.0 +# # # cdef double count_k +# # # cdef SIZE_t k +# # # cdef SIZE_t c +# # # +# # # for k in range(self.n_outputs): +# # # for c in range(n_classes[k]): +# # # count_k = sum_left[c] +# # # if count_k > 0.0: +# # # count_k /= self.weighted_n_left +# # # entropy_left -= count_k * log(count_k) +# # # +# # # count_k = sum_right[c] +# # # if count_k > 0.0: +# # # count_k /= self.weighted_n_right +# # # entropy_right -= count_k * log(count_k) +# # # +# # # sum_left += self.sum_stride +# # # sum_right += self.sum_stride +# # # +# # # impurity_left[0] = entropy_left / self.n_outputs +# # # impurity_right[0] = entropy_right / self.n_outputs diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index 156c187a8e13c5071085a833a81db9637b031099..67abed095bdbbca5a452fdfd62fd0808abcb440c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -2,47 +2,50 @@ """ Execution: Script to perform a MonoView classification """ +import errno +import logging # To create Log-Files # Import built-in modules import os # to geth path of the running script import time # for time calculations -import errno +import h5py # Import 3rd party modules import numpy as np # for reading CSV-files and Series -import logging # To create Log-Files -import h5py +from . import MonoviewUtils +from .analyzeResult import execute # Import own modules from .. import MonoviewClassifiers -from .analyzeResult import execute from ..utils.Dataset import getValue, extractSubset -from . import MonoviewUtils -from ..utils.GetMultiviewDb import TanhNormalizer # Author-Info __author__ = "Nikolas Huelsmann, Baptiste BAUVIN" __status__ = "Prototype" # Production, Development, Prototype -# __date__ = 2016 - 03 - 25 +# __date__ = 2016 - 03 - 25 -def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType, - path, randomState, labels, hyperParamSearch="randomizedSearch", - metrics=[["accuracy_score", None]], nIter=30, **args): +def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, + KFolds, datasetFileIndex, databaseType, + path, randomState, labels, + hyperParamSearch="randomizedSearch", + metrics=[["accuracy_score", None]], nIter=30, + **args): DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r") neededViewIndex = args["viewIndex"] X = DATASET.get("View" + str(neededViewIndex)) Y = labels - return ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, 1, databaseType, path, + return ExecMonoview(directory, X, Y, name, labelsNames, + classificationIndices, KFolds, 1, databaseType, path, randomState, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **args) -def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, +def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, + KFolds, nbCores, databaseType, path, randomState, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): - logging.debug("Start:\t Loading data") kwargs, \ t_start, \ @@ -51,39 +54,46 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol X, \ learningRate, \ labelsString, \ - outputFileName = initConstants(args, X, classificationIndices, labelsNames, name, directory) + outputFileName = initConstants(args, X, classificationIndices, labelsNames, + name, directory) logging.debug("Done:\t Loading data") - logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train ratio:" - + str(learningRate) + ", CrossValidation k-folds: " + str(KFolds.n_splits) + ", cores:" - + str(nbCores) + ", algorithm : " + CL_type) + logging.debug( + "Info:\t Classification - Database:" + str(name) + " Feature:" + str( + feat) + " train ratio:" + + str(learningRate) + ", CrossValidation k-folds: " + str( + KFolds.n_splits) + ", cores:" + + str(nbCores) + ", algorithm : " + CL_type) logging.debug("Start:\t Determine Train/Test split") - X_train, y_train, X_test, y_test, X_test_multiclass = initTrainTest(X, Y, classificationIndices) + X_train, y_train, X_test, y_test, X_test_multiclass = initTrainTest(X, Y, + classificationIndices) - logging.debug("Info:\t Shape X_train:" + str(X_train.shape) + ", Length of y_train:" + str(len(y_train))) - logging.debug("Info:\t Shape X_test:" + str(X_test.shape) + ", Length of y_test:" + str(len(y_test))) + logging.debug("Info:\t Shape X_train:" + str( + X_train.shape) + ", Length of y_train:" + str(len(y_train))) + logging.debug("Info:\t Shape X_test:" + str( + X_test.shape) + ", Length of y_test:" + str(len(y_test))) logging.debug("Done:\t Determine Train/Test split") logging.debug("Start:\t Generate classifier args") classifierModule = getattr(MonoviewClassifiers, CL_type) clKWARGS, testFoldsPreds = getHPs(classifierModule, hyperParamSearch, - nIter, CL_type, X_train, y_train, - randomState, outputFileName, - KFolds, nbCores, metrics, kwargs) + nIter, CL_type, X_train, y_train, + randomState, outputFileName, + KFolds, nbCores, metrics, kwargs) logging.debug("Done:\t Generate classifier args") logging.debug("Start:\t Training") classifier = getattr(classifierModule, CL_type)(randomState, **clKWARGS) - classifier.fit(X_train, y_train) # NB_CORES=nbCores, + classifier.fit(X_train, y_train) # NB_CORES=nbCores, logging.debug("Done:\t Training") logging.debug("Start:\t Predicting") y_train_pred = classifier.predict(X_train) y_test_pred = classifier.predict(X_test) - full_labels_pred = np.zeros(Y.shape, dtype=int)-100 + full_labels_pred = np.zeros(Y.shape, dtype=int) - 100 for trainIndex, index in enumerate(classificationIndices[0]): full_labels_pred[index] = y_train_pred[trainIndex] for testIndex, index in enumerate(classificationIndices[1]): @@ -95,27 +105,31 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.debug("Done:\t Predicting") t_end = time.time() - t_start - logging.debug("Info:\t Time for training and predicting: " + str(t_end) + "[s]") + logging.debug( + "Info:\t Time for training and predicting: " + str(t_end) + "[s]") logging.debug("Start:\t Getting Results") stringAnalysis, \ imagesAnalysis, \ metricsScores = execute(name, classificationIndices, KFolds, nbCores, - hyperParamSearch, metrics, nIter, feat, CL_type, - clKWARGS, labelsNames, X.shape, - y_train, y_train_pred, y_test, y_test_pred, t_end, - randomState, classifier, outputFileName) + hyperParamSearch, metrics, nIter, feat, CL_type, + clKWARGS, labelsNames, X.shape, + y_train, y_train_pred, y_test, y_test_pred, t_end, + randomState, classifier, outputFileName) # cl_desc = [value for key, value in sorted(clKWARGS.items())] logging.debug("Done:\t Getting Results") logging.debug("Start:\t Saving preds") - saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis, y_test) + saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, + y_train, imagesAnalysis, y_test) logging.info("Done:\t Saving Results") viewIndex = args["viewIndex"] if testFoldsPreds is None: testFoldsPreds = y_train_pred - return MonoviewUtils.MonoviewResult(viewIndex, CL_type, feat, metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred, testFoldsPreds) + return MonoviewUtils.MonoviewResult(viewIndex, CL_type, feat, metricsScores, + full_labels_pred, clKWARGS, + y_test_multiclass_pred, testFoldsPreds) # return viewIndex, [CL_type, feat, metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred, testFoldsPreds] @@ -131,12 +145,14 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): feat = X.attrs["name"] CL_type = kwargs["CL_type"] X = getValue(X) - learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1])) + learningRate = float(len(classificationIndices[0])) / ( + len(classificationIndices[0]) + len(classificationIndices[1])) labelsString = "-".join(labelsNames) CL_type_string = CL_type timestr = time.strftime("%Y_%m_%d-%H_%M_%S") outputFileName = directory + CL_type_string + "/" + feat + "/" + timestr + "-Results-" + CL_type_string + "-" + labelsString + \ - '-learnRate_{0:.2f}'.format(learningRate) + '-' + name + "-" + feat + "-" + '-learnRate_{0:.2f}'.format( + learningRate) + '-' + name + "-" + feat + "-" if not os.path.exists(os.path.dirname(outputFileName)): try: os.makedirs(os.path.dirname(outputFileName)) @@ -150,7 +166,7 @@ def initTrainTest(X, Y, classificationIndices): trainIndices, testIndices, testIndicesMulticlass = classificationIndices X_train = extractSubset(X, trainIndices) X_test = extractSubset(X, testIndices) - if np.array(testIndicesMulticlass).size != 0 : + if np.array(testIndicesMulticlass).size != 0: X_test_multiclass = extractSubset(X, testIndicesMulticlass) else: X_test_multiclass = [] @@ -159,15 +175,24 @@ def initTrainTest(X, Y, classificationIndices): return X_train, y_train, X_test, y_test, X_test_multiclass -def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState, +def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, + randomState, outputFileName, KFolds, nbCores, metrics, kwargs): if hyperParamSearch != "None": - logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type) + logging.debug( + "Start:\t " + hyperParamSearch + " best settings with " + str( + nIter) + " iterations for " + CL_type) classifierHPSearch = getattr(MonoviewUtils, hyperParamSearch) - clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, randomState, - outputFileName, classifierModule, CL_type, - KFolds=KFolds, nbCores=nbCores, - metric=metrics[0], nIter=nIter, classifier_KWARGS=kwargs[CL_type + "KWARGS"]) + clKWARGS, testFoldsPreds = classifierHPSearch(X_train, y_train, + randomState, + outputFileName, + classifierModule, CL_type, + KFolds=KFolds, + nbCores=nbCores, + metric=metrics[0], + nIter=nIter, + classifier_KWARGS=kwargs[ + CL_type + "KWARGS"]) logging.debug("Done:\t " + hyperParamSearch + " best settings") else: clKWARGS = kwargs[CL_type + "KWARGS"] @@ -175,14 +200,18 @@ def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, return clKWARGS, testFoldsPreds -def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis, y_test): +def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, + y_train, imagesAnalysis, y_test): logging.info(stringAnalysis) outputTextFile = open(outputFileName + 'summary.txt', 'w') outputTextFile.write(stringAnalysis) outputTextFile.close() - np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",") - np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",") - np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "full_pred.csv", + full_labels_pred.astype(np.int16), delimiter=",") + np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), + delimiter=",") + np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), + delimiter=",") np.savetxt(outputFileName + "test_labels.csv", y_test.astype(np.int16), delimiter=",") @@ -190,14 +219,14 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, for imageName in imagesAnalysis: if os.path.isfile(outputFileName + imageName + ".png"): for i in range(1, 20): - testFileName = outputFileName + imageName + "-" + str(i) + ".png" + testFileName = outputFileName + imageName + "-" + str( + i) + ".png" if not os.path.isfile(testFileName): imagesAnalysis[imageName].savefig(testFileName) break - imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png') - - + imagesAnalysis[imageName].savefig( + outputFileName + imageName + '.png') if __name__ == '__main__': @@ -215,30 +244,39 @@ if __name__ == '__main__': formatter_class=argparse.ArgumentDefaultsHelpFormatter) groupStandard = parser.add_argument_group('Standard arguments') - groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') + groupStandard.add_argument('-log', action='store_true', + help='Use option to activate Logging to Console') groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database', default='Plausible') groupStandard.add_argument('--cl_name', metavar='STRING', action='store', - help='THe name of the monoview classifier to use', default='DecisionTree') + help='THe name of the monoview classifier to use', + default='DecisionTree') groupStandard.add_argument('--view', metavar='STRING', action='store', help='Name of the view used', default='View0') groupStandard.add_argument('--pathF', metavar='STRING', action='store', - help='Path to the database hdf5 file', default='../../../Data/Plausible') + help='Path to the database hdf5 file', + default='../../../Data/Plausible') groupStandard.add_argument('--directory', metavar='STRING', action='store', help='Path of the output directory', default='') - groupStandard.add_argument('--labelsNames', metavar='STRING', action='store', nargs='+', - help='Name of the labels used for classification', default=['Yes', 'No']) - groupStandard.add_argument('--classificationIndices', metavar='STRING', action='store', + groupStandard.add_argument('--labelsNames', metavar='STRING', + action='store', nargs='+', + help='Name of the labels used for classification', + default=['Yes', 'No']) + groupStandard.add_argument('--classificationIndices', metavar='STRING', + action='store', help='Path to the classificationIndices pickle file', default='') groupStandard.add_argument('--KFolds', metavar='STRING', action='store', help='Path to the kFolds pickle file', default='') - groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all', + groupStandard.add_argument('--nbCores', metavar='INT', action='store', + help='Number of cores, -1 for all', type=int, default=1) groupStandard.add_argument('--randomState', metavar='INT', action='store', - help='Seed for the random state or pickable randomstate file', default=42) - groupStandard.add_argument('--hyperParamSearch', metavar='STRING', action='store', + help='Seed for the random state or pickable randomstate file', + default=42) + groupStandard.add_argument('--hyperParamSearch', metavar='STRING', + action='store', help='The type of method used to search the best set of hyper parameters', default='randomizedSearch') groupStandard.add_argument('--metrics', metavar='STRING', action='store', @@ -248,7 +286,8 @@ if __name__ == '__main__': help='Path to the pickle file containing the key-words arguments used for classification', default='') groupStandard.add_argument('--nIter', metavar='INT', action='store', - help='Number of itetarion in hyper parameter search', type=int, + help='Number of itetarion in hyper parameter search', + type=int, default=10) args = parser.parse_args() @@ -275,12 +314,12 @@ if __name__ == '__main__': databaseType = None - # Extract the data using MPI X, Y = Dataset.getMonoviewShared(path, name, viewName) # Init log - logFileName = time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + name + "-"+ viewName +"-" + classifierName +'-LOG' + logFileName = time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + name + "-" + viewName + "-" + classifierName + '-LOG' if not os.path.exists(os.path.dirname(directory + logFileName)): try: os.makedirs(os.path.dirname(directory + logFileName)) @@ -296,23 +335,25 @@ if __name__ == '__main__': break else: logFile += ".log" - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG, + logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', + filename=logFile, level=logging.DEBUG, filemode='w') if args.log: logging.getLogger().addHandler(logging.StreamHandler()) # Computing on multiple cores - res = ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFolds, nbCores, databaseType, path, - randomState, hyperParamSearch=hyperParamSearch, - metrics=metrics, nIter=nIter, **kwargs) + res = ExecMonoview(directory, X, Y, name, labelsNames, + classificationIndices, KFolds, nbCores, databaseType, + path, + randomState, hyperParamSearch=hyperParamSearch, + metrics=metrics, nIter=nIter, **kwargs) with open(directory + "res.pickle", "wb") as handle: pickle.dump(res, handle) - # Pickle the res in a file to be reused. # Go put a token in the token files without breaking everything. # Need to write a function to be able to know the timeu sed # for a monoview experimentation approximately and the ressource it uses to write automatically the file in the shell - # it will have to be a not-too close approx as the taskswont be long and Ram-o-phage \ No newline at end of file + # it will have to be a not-too close approx as the taskswont be long and Ram-o-phage diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py index a5e34ddaf537e2667d0df827840cff5571081239..ba1a9088fcfd4aeec3606e70117d4b244a5f9530 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExportResults.py @@ -6,16 +6,15 @@ import os # for iteration throug directories import string # to generate a range of letters +import matplotlib.pyplot as plt # for Plots +import numpy as np # for Numpy Arrays # Import 3rd party modules import pandas as pd # for Series and DataFrames -import numpy as np # for Numpy Arrays -import matplotlib.pyplot as plt # for Plots -from scipy.interpolate import interp1d # to Interpolate Data -import matplotlib - # matplotlib.use('Agg') -from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, HPacker # to generate the Annotations in plot +from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, \ + HPacker # to generate the Annotations in plot from pylab import rcParams # to change size of plot +from scipy.interpolate import interp1d # to Interpolate Data from sklearn import metrics # For stastics on classification # Import own modules @@ -50,7 +49,8 @@ def exportNumpyToCSV(numpyArray, directory, filename, format): for i in range(1, 20): testFileName = filename + "-" + str(i) + ".csv" if not os.path.isfile(directory + testFileName): - np.savetxt(directory + testFileName, numpyArray, delimiter=";", fmt=format) + np.savetxt(directory + testFileName, numpyArray, delimiter=";", + fmt=format) break else: @@ -60,7 +60,8 @@ def exportNumpyToCSV(numpyArray, directory, filename, format): #### Rendering of results ### Rendering of Score and Time -def showScoreTime(directory, filename, store, resScore, resTime, rangeX, parameter, feat_desc, cl_desc, fig_desc, +def showScoreTime(directory, filename, store, resScore, resTime, rangeX, + parameter, feat_desc, cl_desc, fig_desc, y_desc1, y_desc2): # Determine interpolated functions @@ -93,8 +94,9 @@ def showScoreTime(directory, filename, store, resScore, resTime, rangeX, paramet letters = string.lowercase[0:len(rangeX)] legend = "" - for act_x, act_score, act_time, act_feat_desc, letter, act_cl_desc in zip(rangeX, resScore, resTime, feat_desc, - letters, cl_desc): + for act_x, act_score, act_time, act_feat_desc, letter, act_cl_desc in zip( + rangeX, resScore, resTime, feat_desc, + letters, cl_desc): # Add a letter (a,b,c,..) to each DataPoint ax1.annotate(letter, xy=(act_x, act_score), xytext=(act_x, act_score)) ax2.annotate(letter, xy=(act_x, act_time), xytext=(act_x, act_time)) @@ -151,7 +153,8 @@ def calcScorePerClass(np_labels, np_output): score = [] for i in pd_label_test.unique(): - matches = sum(pd_label_test[pd_label_test == i] == pd_output[pd_label_test[pd_label_test == i].index]) + matches = sum(pd_label_test[pd_label_test == i] == pd_output[ + pd_label_test[pd_label_test == i].index]) count = float(len(pd_label_test[pd_label_test == i])) score.append(matches / count) @@ -165,7 +168,8 @@ def showResults(directory, filename, db, feat, score): plt.bar(range(0, len(score)), score * 100, 1) plt.xlabel('ClassLabels') plt.ylabel('Precision in %') - plt.title('Results of ' + feat + '-Classification\n for ' + db + ' Database') + plt.title( + 'Results of ' + feat + '-Classification\n for ' + db + ' Database') plt.axis([0, len(score), 0, 100]) plt.xticks(range(0, len(score), 5)) @@ -184,7 +188,6 @@ def showResults(directory, filename, db, feat, score): plt.close() - # instead of saving - decomment plt.show() # plt.show() @@ -195,11 +198,13 @@ def accuracy_score(y_test, y_test_pred): # Function to calculate a report of classifiaction and store it -def classification_report_df(directory, filename, y_test, y_test_pred, labels, target_names): +def classification_report_df(directory, filename, y_test, y_test_pred, labels, + target_names): # Calculate the metrics - precision, recall, f1score, support = metrics.precision_recall_fscore_support(y_test, y_test_pred, beta=1.0, - labels=labels, pos_label=None, - average=None) + precision, recall, f1score, support = metrics.precision_recall_fscore_support( + y_test, y_test_pred, beta=1.0, + labels=labels, pos_label=None, + average=None) # turn result into DataFrame scores_df = pd.DataFrame(data=[precision, recall, f1score, support]) @@ -221,7 +226,8 @@ def confusion_matrix_df(directory, filename, y_test, y_test_pred, target_names): y_pred = pd.Series(y_test_pred, name='Predicted') # Calculate confusion matrix - df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True) + df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], + colnames=['Predicted'], margins=True) # Normalization of confusion matrix df_conf_norm = df_confusion / df_confusion.sum(axis=1) @@ -230,14 +236,14 @@ def confusion_matrix_df(directory, filename, y_test, y_test_pred, target_names): # Add Row: Actual / Column: Predicted into first cell [0,0] - # Store result as CSV exportPandasToCSV(df_conf_norm, directory, filename) return df_conf_norm -def plot_confusion_matrix(directory, filename, df_confusion, title='Confusion matrix', cmap=plt.cm.gray_r): +def plot_confusion_matrix(directory, filename, df_confusion, + title='Confusion matrix', cmap=plt.cm.gray_r): plt.matshow(df_confusion, cmap=cmap) # imshow # plt.title(title) plt.colorbar() @@ -278,19 +284,24 @@ def classification_stats(directory, filename, scores_df, acc): worst10 = list(worst10.index) # Ratio of classes with F1-Score==0 of all classes - ratio_zero = float(float(len(scores_df[scores_df.F1 == 0])) / float(len(scores_df))) + ratio_zero = float( + float(len(scores_df[scores_df.F1 == 0])) / float(len(scores_df))) # Mean of F1-Score of top 10 classes by F1-Score - mean_10 = np.mean(scores_df.sort_values(["F1"], ascending=False).head(10).F1) + mean_10 = np.mean( + scores_df.sort_values(["F1"], ascending=False).head(10).F1) # Mean of F1-Score of top 20 classes by F1-Score - mean_20 = np.mean(scores_df.sort_values(["F1"], ascending=False).head(20).F1) + mean_20 = np.mean( + scores_df.sort_values(["F1"], ascending=False).head(20).F1) # Mean of F1-Score of top 30 classes by F1-Score - mean_30 = np.mean(scores_df.sort_values(["F1"], ascending=False).head(30).F1) + mean_30 = np.mean( + scores_df.sort_values(["F1"], ascending=False).head(30).F1) # Create DataFrame with stats - d = {'Statistic': ['Accuracy score on test', 'Top 10 classes by F1-Score', 'Worst 10 classes by F1-Score', + d = {'Statistic': ['Accuracy score on test', 'Top 10 classes by F1-Score', + 'Worst 10 classes by F1-Score', 'Ratio of classes with F1-Score==0 of all classes', 'Mean of F1-Score of top 10 classes by F1-Score', 'Mean of F1-Score of top 20 classes by F1-Score', diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index 5e80bbc76f20a29ae8e44f1847ec0fde2f919092..daa2fff718d2d083adfc89b985b9c1e973356cda 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -1,9 +1,10 @@ -from sklearn.model_selection import RandomizedSearchCV -from scipy.stats import uniform, randint -import numpy as np +import pickle + import matplotlib.pyplot as plt +import numpy as np from matplotlib.ticker import FuncFormatter -import pickle +from scipy.stats import uniform, randint +from sklearn.model_selection import RandomizedSearchCV from .. import Metrics from ..utils import HyperParameterSearch @@ -11,27 +12,41 @@ from ..utils import HyperParameterSearch # Author-Info __author__ = "Nikolas Huelsmann, Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + + # __date__ = 2016 - 03 - 25 -def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierModule, CL_type, KFolds = 4, nbCores = 1, - metric = ["accuracy_score", None], nIter = 30, classifier_KWARGS=None): - estimator = getattr(classifierModule, CL_type)(randomState, **classifier_KWARGS) +def randomizedSearch(X_train, y_train, randomState, outputFileName, + classifierModule, CL_type, KFolds=4, nbCores=1, + metric=["accuracy_score", None], nIter=30, + classifier_KWARGS=None): + estimator = getattr(classifierModule, CL_type)(randomState, + **classifier_KWARGS) params_dict = estimator.genDistribs() if params_dict: metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: metricKWARGS = {} scorer = metricModule.get_scorer(**metricKWARGS) nb_possible_combinations = compute_possible_combinations(params_dict) - min_list = np.array([min(nb_possible_combination, nIter) for nb_possible_combination in nb_possible_combinations]) - randomSearch = RandomizedSearchCV(estimator, n_iter=int(np.sum(min_list)), param_distributions=params_dict, refit=True, - n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) + min_list = np.array( + [min(nb_possible_combination, nIter) for nb_possible_combination in + nb_possible_combinations]) + randomSearch = RandomizedSearchCV(estimator, + n_iter=int(np.sum(min_list)), + param_distributions=params_dict, + refit=True, + n_jobs=nbCores, scoring=scorer, + cv=KFolds, random_state=randomState) detector = randomSearch.fit(X_train, y_train) - bestParams = dict((key, value) for key, value in estimator.genBestParams(detector).items() if key is not "random_state") + bestParams = dict((key, value) for key, value in + estimator.genBestParams(detector).items() if + key is not "random_state") scoresArray = detector.cv_results_['mean_test_score'] params = estimator.genParamsFromDetector(detector) @@ -44,19 +59,21 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo testFoldsPreds = genTestFoldsPreds(X_train, y_train, KFolds, best_estimator) return bestParams, testFoldsPreds + def change_label_to_minus(y): minus_y = np.copy(y) - minus_y[np.where(y==0)]=-1 + minus_y[np.where(y == 0)] = -1 return minus_y + def change_label_to_zero(y): zeroed_y = np.copy(y) - zeroed_y[np.where(y==-1)]=0 + zeroed_y[np.where(y == -1)] = 0 return zeroed_y def compute_possible_combinations(params_dict): - n_possibs = np.ones(len(params_dict))*np.inf + n_possibs = np.ones(len(params_dict)) * np.inf for value_index, value in enumerate(params_dict.values()): if type(value) == list: n_possibs[value_index] = len(value) @@ -69,13 +86,14 @@ def genTestFoldsPreds(X_train, y_train, KFolds, estimator): testFoldsPreds = [] trainIndex = np.arange(len(y_train)) folds = KFolds.split(trainIndex, y_train) - foldLengths = np.zeros(KFolds.n_splits,dtype=int) + foldLengths = np.zeros(KFolds.n_splits, dtype=int) for foldIndex, (trainIndices, testIndices) in enumerate(folds): foldLengths[foldIndex] = len(testIndices) estimator.fit(X_train[trainIndices], y_train[trainIndices]) testFoldsPreds.append(estimator.predict(X_train[trainIndices])) minFoldLength = foldLengths.min() - testFoldsPreds = np.array([testFoldPreds[:minFoldLength] for testFoldPreds in testFoldsPreds]) + testFoldsPreds = np.array( + [testFoldPreds[:minFoldLength] for testFoldPreds in testFoldsPreds]) return testFoldsPreds @@ -83,6 +101,7 @@ class CustomRandint: """Used as a distribution returning a integer between low and high-1. It can be used with a multiplier agrument to be able to perform more complex generation for example 10 e -(randint)""" + def __init__(self, low=0, high=0, multiplier=""): self.randint = randint(low, high) self.multiplier = multiplier @@ -95,13 +114,14 @@ class CustomRandint: return randinteger def get_nb_possibilities(self): - return self.randint.b-self.randint.a + return self.randint.b - self.randint.a class CustomUniform: """Used as a distribution returning a float between loc and loc + scale.. It can be used with a multiplier agrument to be able to perform more complex generation for example 10 e -(float)""" + def __init__(self, loc=0, state=1, multiplier=""): self.uniform = uniform(loc, state) self.multiplier = multiplier @@ -117,34 +137,43 @@ class CustomUniform: class BaseMonoviewClassifier(object): def genBestParams(self, detector): - return dict((param_name, detector.best_params_[param_name]) for param_name in self.param_names) + return dict( + (param_name, detector.best_params_[param_name]) for param_name in + self.param_names) def genParamsFromDetector(self, detector): if self.classed_params: - classed_dict = dict((classed_param, get_names(detector.cv_results_["param_"+classed_param])) + classed_dict = dict((classed_param, get_names( + detector.cv_results_["param_" + classed_param])) for classed_param in self.classed_params) if self.param_names: - return [(param_name, np.array(detector.cv_results_["param_"+param_name])) - if param_name not in self.classed_params else (param_name, classed_dict[param_name]) - for param_name in self.param_names] + return [(param_name, + np.array(detector.cv_results_["param_" + param_name])) + if param_name not in self.classed_params else ( + param_name, classed_dict[param_name]) + for param_name in self.param_names] else: return [()] def genDistribs(self): - return dict((param_name, distrib) for param_name, distrib in zip(self.param_names, self.distribs)) + return dict((param_name, distrib) for param_name, distrib in + zip(self.param_names, self.distribs)) def getConfig(self): if self.param_names: - return "\n\t\t- "+self.__class__.__name__+ "with "+ ", ".join([ param_name+" : " + self.to_str(param_name) for param_name in self.param_names]) + return "\n\t\t- " + self.__class__.__name__ + "with " + ", ".join( + [param_name + " : " + self.to_str(param_name) for param_name in + self.param_names]) else: - return "\n\t\t- "+self.__class__.__name__+ "with no config." + return "\n\t\t- " + self.__class__.__name__ + "with no config." def to_str(self, param_name): if param_name in self.weird_strings: if self.weird_strings[param_name] == "class_name": return self.get_params()[param_name].__class__.__name__ else: - return self.weird_strings[param_name](self.get_params()[param_name]) + return self.weird_strings[param_name]( + self.get_params()[param_name]) else: return str(self.get_params()[param_name]) @@ -152,7 +181,8 @@ class BaseMonoviewClassifier(object): """Used to generate a graph and a pickle dictionary representing feature importances""" featureImportances = self.feature_importances_ sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:nb_considered_feats] + featureImportancesSorted = featureImportances[sortedArgs][ + :nb_considered_feats] featureIndicesSorted = sortedArgs[:nb_considered_feats] fig, ax = plt.subplots() x = np.arange(len(featureIndicesSorted)) @@ -163,15 +193,18 @@ class BaseMonoviewClassifier(object): fig.savefig(directory + "feature_importances.png") plt.close() featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) + for featureIndex, featureImportance in + enumerate(featureImportances) if featureImportance != 0) with open(directory + 'feature_importances.pickle', 'wb') as handle: pickle.dump(featuresImportancesDict, handle) interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): + for featureIndex, featureImportance in zip(featureIndicesSorted, + featureImportancesSorted): if featureImportance > 0: interpretString += "- Feature index : " + str(featureIndex) + \ - ", feature importance : " + str(featureImportance) + "\n" + ", feature importance : " + str( + featureImportance) + "\n" return interpretString def get_name_for_fusion(self): @@ -181,13 +214,15 @@ class BaseMonoviewClassifier(object): def get_names(classed_list): return np.array([object_.__class__.__name__ for object_ in classed_list]) + def percent(x, pos): """Used to print percentage of importance on the y axis""" return '%1.1f %%' % (x * 100) class MonoviewResult(object): - def __init__(self, view_index, classifier_name, view_name, metrics_scores, full_labels_pred, + def __init__(self, view_index, classifier_name, view_name, metrics_scores, + full_labels_pred, classifier_config, y_test_multiclass_pred, test_folds_preds): self.view_index = view_index self.classifier_name = classifier_name @@ -199,9 +234,7 @@ class MonoviewResult(object): self.test_folds_preds = test_folds_preds def get_classifier_name(self): - return self.classifier_name+"-"+self.view_name - - + return self.classifier_name + "-" + self.view_name # def isUseful(labelSupports, index, CLASS_LABELS, labelDict): # if labelSupports[labelDict[CLASS_LABELS[index]]] != 0: @@ -444,27 +477,26 @@ class MonoviewResult(object): # return description, KNN_detector +# def calcClassifRandomForest(X_train, X_test, y_test, y_train, num_estimators): +# from sklearn.grid_search import ParameterGrid +# param_rf = { 'classifier__n_estimators': num_estimators} +# forest = RandomForestClassifier() +# +# bestgrid=0; +# for g in ParameterGrid(grid): +# forest.set_params(**g) +# forest.fit(X_train,y_train) +# score = forest.score(X_test, y_test) +# +# if score > best_score: +# best_score = score +# best_grid = g +# +# rf_detector = RandomForestClassifier() +# rf_detector.set_params(**best_grid) +# rf_detector.fit(X_train,y_train) + +# #desc_estimators = best_grid +# description = "Classif_" + "RF" + "-" + "CV_" + "NO" + "-" + "Trees_" + str(best_grid) - # def calcClassifRandomForest(X_train, X_test, y_test, y_train, num_estimators): - # from sklearn.grid_search import ParameterGrid - # param_rf = { 'classifier__n_estimators': num_estimators} - # forest = RandomForestClassifier() - # - # bestgrid=0; - # for g in ParameterGrid(grid): - # forest.set_params(**g) - # forest.fit(X_train,y_train) - # score = forest.score(X_test, y_test) - # - # if score > best_score: - # best_score = score - # best_grid = g - # - # rf_detector = RandomForestClassifier() - # rf_detector.set_params(**best_grid) - # rf_detector.fit(X_train,y_train) - - # #desc_estimators = best_grid - # description = "Classif_" + "RF" + "-" + "CV_" + "NO" + "-" + "Trees_" + str(best_grid) - - # return (description, rf_detector) +# return (description, rf_detector) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py index daf8dd09f7e58a5bdf70d1bd0d536f0b59847076..e293bfc97e898981431d2a982ee80909138d13be 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py @@ -1,27 +1,31 @@ from datetime import timedelta as hms -import numpy as np -from .. import MonoviewClassifiers from .. import Metrics -def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames, KFolds): - learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1])) +def getDBConfigString(name, feat, classificationIndices, shape, + classLabelsNames, KFolds): + learningRate = float(len(classificationIndices[0])) / ( + len(classificationIndices[0]) + len(classificationIndices[1])) dbConfigString = "Database configuration : \n" dbConfigString += "\t- Database name : " + name + "\n" - dbConfigString += "\t- View name : " + feat + "\t View shape : " + str(shape) + "\n" + dbConfigString += "\t- View name : " + feat + "\t View shape : " + str( + shape) + "\n" dbConfigString += "\t- Learning Rate : " + str(learningRate) + "\n" dbConfigString += "\t- Labels used : " + ", ".join(classLabelsNames) + "\n" - dbConfigString += "\t- Number of cross validation folds : " + str(KFolds.n_splits) + "\n\n" + dbConfigString += "\t- Number of cross validation folds : " + str( + KFolds.n_splits) + "\n\n" return dbConfigString -def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory, y_test): +def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, + directory, y_test): classifierConfigString = "Classifier configuration : \n" classifierConfigString += "\t- " + classifier.getConfig()[5:] + "\n" classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n" if gridSearch: - classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n" + classifierConfigString += "\t- Got configuration using randomized search with " + str( + nIter) + " iterations \n" classifierConfigString += "\n\n" classifierInterpretString = classifier.getInterpret(directory, y_test) return classifierConfigString, classifierInterpretString @@ -30,32 +34,41 @@ def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: metricKWARGS = {} metricScoreTrain = metricModule.score(y_train, y_train_pred) metricScoreTest = metricModule.score(y_test, y_test_pred) - metricScoreString = "\tFor " + metricModule.getConfig(**metricKWARGS) + " : " + metricScoreString = "\tFor " + metricModule.getConfig( + **metricKWARGS) + " : " metricScoreString += "\n\t\t- Score on train : " + str(metricScoreTrain) metricScoreString += "\n\t\t- Score on test : " + str(metricScoreTest) metricScoreString += "\n" return metricScoreString, [metricScoreTrain, metricScoreTest] -def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type, clKWARGS, classLabelsNames, - shape, y_train, y_train_pred, y_test, y_test_pred, time, randomState, classifier, directory): +def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, + feat, CL_type, clKWARGS, classLabelsNames, + shape, y_train, y_train_pred, y_test, y_test_pred, time, + randomState, classifier, directory): metricsScores = {} metricModule = getattr(Metrics, metrics[0][0]) trainScore = metricModule.score(y_train, y_train_pred) testScore = metricModule.score(y_test, y_test_pred) stringAnalysis = "Classification on " + name + " database for " + feat + " with " + CL_type + ".\n\n" - stringAnalysis += metrics[0][0] + " on train : " + str(trainScore) + "\n" + metrics[0][0] + " on test : " + str( + stringAnalysis += metrics[0][0] + " on train : " + str(trainScore) + "\n" + \ + metrics[0][0] + " on test : " + str( testScore) + "\n\n" - stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds) - classifierConfigString, classifierIntepretString = getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory, y_test) + stringAnalysis += getDBConfigString(name, feat, learningRate, shape, + classLabelsNames, KFolds) + classifierConfigString, classifierIntepretString = getClassifierConfigString( + gridSearch, nbCores, nIter, clKWARGS, classifier, directory, y_test) stringAnalysis += classifierConfigString for metric in metrics: - metricString, metricScore = getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) + metricString, metricScore = getMetricScore(metric, y_train, + y_train_pred, y_test, + y_test_pred) stringAnalysis += metricString metricsScores[metric[0]] = metricScore # stringAnalysis += getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 805275332bfc6b38beafd1e8292cbab04098bd5e..1965fa6855ce670e74f6b6a7ce3fcdddc1a864fe 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -1,13 +1,12 @@ +import time + +import numpy as np from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier -import numpy as np -import time -from sklearn.metrics import accuracy_score -from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import get_accuracy_graph from .. import Metrics from ..Monoview.Additions.BoostUtils import get_accuracy_graph +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" @@ -23,10 +22,11 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): n_estimators=n_estimators, base_estimator=base_estimator, algorithm="SAMME" - ) + ) self.param_names = ["n_estimators", "base_estimator"] self.classed_params = ["base_estimator"] - self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier(max_depth=1)]] + self.distribs = [CustomRandint(low=1, high=500), + [DecisionTreeClassifier(max_depth=1)]] self.weird_strings = {"base_estimator": "class_name"} self.plotted_metric = Metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" @@ -36,11 +36,16 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): begin = time.time() super(Adaboost, self).fit(X, y, sample_weight=sample_weight) end = time.time() - self.train_time = end-begin + self.train_time = end - begin self.train_shape = X.shape - self.base_predictions = np.array([estim.predict(X) for estim in self.estimators_]) - self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) - self.bounds = np.array([np.prod(np.sqrt(1-4*np.square(0.5-self.estimator_errors_[:i+1]))) for i in range(self.estimator_errors_.shape[0])]) + self.base_predictions = np.array( + [estim.predict(X) for estim in self.estimators_]) + self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in + self.staged_predict(X)]) + self.bounds = np.array([np.prod( + np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) + for i in + range(self.estimator_errors_.shape[0])]) def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -52,21 +57,32 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): end = time.time() self.pred_time = end - begin if X.shape != self.train_shape: - self.step_predictions = np.array([step_pred for step_pred in self.staged_predict(X)]) + self.step_predictions = np.array( + [step_pred for step_pred in self.staged_predict(X)]) return pred def getInterpret(self, directory, y_test): interpretString = "" interpretString += self.getFeatureImportance(directory) interpretString += "\n\n Estimator error | Estimator weight\n" - interpretString += "\n".join([str(error) +" | "+ str(weight/sum(self.estimator_weights_)) for error, weight in zip(self.estimator_errors_, self.estimator_weights_)]) - step_test_metrics = np.array([self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) - get_accuracy_graph(step_test_metrics, "Adaboost", directory + "test_metrics.png", + interpretString += "\n".join( + [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for + error, weight in + zip(self.estimator_errors_, self.estimator_weights_)]) + step_test_metrics = np.array( + [self.plotted_metric.score(y_test, step_pred) for step_pred in + self.step_predictions]) + get_accuracy_graph(step_test_metrics, "Adaboost", + directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "Adaboost", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds), bound_name="boosting bound") - np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') + get_accuracy_graph(self.metrics, "Adaboost", directory + "metrics.png", + self.plotted_metric_name, bounds=list(self.bounds), + bound_name="boosting bound") + np.savetxt(directory + "test_metrics.csv", step_test_metrics, + delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') - np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',') + np.savetxt(directory + "times.csv", + np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py index bbad3086e4e115da5db25ecfa39c0a6f2951929b..d7ddcab5b48dd543cd45612261c1412cd6b928ad 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostGraalpy.py @@ -1,11 +1,15 @@ import logging + import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_is_fitted -from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero -from ..Monoview.Additions.BoostUtils import getInterpretBase, StumpsClassifiersGenerator, BaseBoost from ..Metrics import zero_one_loss +from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator, \ + BaseBoost +from ..Monoview.MonoviewUtils import CustomRandint, \ + BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero + class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): """Scikit-Learn compatible AdaBoost classifier. Original code by Pascal Germain, adapted by Jean-Francis Roy. @@ -24,9 +28,13 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): A function to call at each iteration that is supplied learning information. Defaults to None. """ - def __init__(self, n_iterations=200, iterations_to_collect_as_hyperparameters=True, classifiers_generator=None, callback_function=None, n_stumps=10, self_complemented=True): + + def __init__(self, n_iterations=200, + iterations_to_collect_as_hyperparameters=True, + classifiers_generator=None, callback_function=None, + n_stumps=10, self_complemented=True): self.n_iterations = n_iterations - self.n_stumps=n_stumps + self.n_stumps = n_stumps self.iterations_to_collect_as_hyperparameters = iterations_to_collect_as_hyperparameters self.estimators_generator = classifiers_generator self.callback_function = callback_function @@ -50,7 +58,9 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): y_neg = change_label_to_minus(y) if self.estimators_generator is None: - self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=self.self_complemented) + self.estimators_generator = StumpsClassifiersGenerator( + n_stumps_per_attribute=self.n_stumps, + self_complemented=self.self_complemented) # Step 1: We fit the classifiers generator and get its classification matrix. self.estimators_generator.fit(X, y_neg) @@ -70,7 +80,8 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): for t in range(self.n_iterations): # Step 4: We find the classifier that maximizes the success, weighted by the sample weights. - classifier_successes = np.dot(classification_matrix.T, sample_weights * y_neg) + classifier_successes = np.dot(classification_matrix.T, + sample_weights * y_neg) best_voter_index = np.argmax(classifier_successes) success = classifier_successes[best_voter_index] @@ -88,7 +99,8 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): # logging.debug("{} : {}".format(t, str(alpha))) # Step 6: We update the sample weights. - sample_weights *= np.exp(-1 * alpha * y_neg * classification_matrix[:, best_voter_index]) + sample_weights *= np.exp( + -1 * alpha * y_neg * classification_matrix[:, best_voter_index]) normalization_constant = sample_weights.sum() sample_weights = sample_weights / normalization_constant @@ -104,11 +116,13 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): self.losses.append(loss) if self.callback_function is not None: - self.callback_function(t, alpha_weights, normalization_constant, self.estimators_generator, self.weights_) + self.callback_function(t, alpha_weights, normalization_constant, + self.estimators_generator, self.weights_) self.weights_ = alpha_weights / np.sum(alpha_weights) self.losses = np.array(self.losses) - self.learner_info_ = {'n_nonzero_weights': np.sum(self.weights_ > 1e-12)} + self.learner_info_ = { + 'n_nonzero_weights': np.sum(self.weights_ > 1e-12)} return self @@ -133,22 +147,26 @@ class AdaBoostGP(BaseEstimator, ClassifierMixin, BaseBoost): self.test_preds = [] for weight_vector in self.collected_weight_vectors_: preds = np.sum(np.multiply(classification_matrix, - weight_vector), axis=1) + weight_vector), axis=1) self.test_preds.append(change_label_to_zero(np.sign(preds))) self.test_preds = np.array(self.test_preds) - margins = np.squeeze(np.asarray(np.dot(classification_matrix, self.weights_))) - return change_label_to_zero(np.array([int(x) for x in np.sign(margins)])) + margins = np.squeeze( + np.asarray(np.dot(classification_matrix, self.weights_))) + return change_label_to_zero( + np.array([int(x) for x in np.sign(margins)])) class AdaboostGraalpy(AdaBoostGP, BaseMonoviewClassifier): - def __init__(self, random_state=None, n_iterations=200, n_stumps=1, **kwargs): + def __init__(self, random_state=None, n_iterations=200, n_stumps=1, + **kwargs): super(AdaboostGraalpy, self).__init__( n_iterations=n_iterations, n_stumps=n_stumps ) - self.param_names = ["n_iterations","n_stumps", "random_state"] - self.distribs = [CustomRandint(low=1, high=500), [n_stumps], [random_state]] + self.param_names = ["n_iterations", "n_stumps", "random_state"] + self.distribs = [CustomRandint(low=1, high=500), [n_stumps], + [random_state]] self.classed_params = [] self.weird_strings = {} self.n_stumps = n_stumps @@ -168,8 +186,8 @@ class AdaboostGraalpy(AdaBoostGP, BaseMonoviewClassifier): step_metrics = [] for step_index in range(self.test_preds.shape[0] - 1): step_metrics.append(zero_one_loss.score(y_test, - self.test_preds[step_index, - :])) + self.test_preds[step_index, + :])) step_metrics = np.array(step_metrics) np.savetxt(directory + "step_test_metrics.csv", step_metrics, delimiter=',') @@ -179,7 +197,7 @@ class AdaboostGraalpy(AdaBoostGP, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {"n_iterations": args.AdG_n_iter, - "n_stumps": args.AdG_stumps,} + "n_stumps": args.AdG_stumps, } return kwargsDict @@ -187,5 +205,5 @@ def paramsToSet(nIter, randomState): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): - paramsSet.append({"n_iterations": randomState.randint(1, 500),}) + paramsSet.append({"n_iterations": randomState.randint(1, 500), }) return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py index e378fd05604275b1728fdf061875f9e808d12eef..6393b19d998158b7957e56a7290d0afb2e3bad6b 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen.py @@ -1,52 +1,64 @@ +import time + +import numpy as np from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier -import numpy as np -import time -from sklearn.metrics import accuracy_score -from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero -from ..Monoview.Additions.BoostUtils import get_accuracy_graph from .. import Metrics -from ..Monoview.Additions.BoostUtils import get_accuracy_graph, StumpsClassifiersGenerator, BaseBoost +from ..Monoview.Additions.BoostUtils import get_accuracy_graph from ..Monoview.Additions.PregenUtils import PregenClassifier +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier, \ + change_label_to_zero # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, PregenClassifier): +class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, + PregenClassifier): def __init__(self, random_state=None, n_estimators=50, - base_estimator=None, n_stumps=1, self_complemeted=True , **kwargs): + base_estimator=None, n_stumps=1, self_complemeted=True, + **kwargs): super(AdaboostPregen, self).__init__( random_state=random_state, n_estimators=n_estimators, base_estimator=base_estimator, algorithm="SAMME" - ) - self.param_names = ["n_estimators", "base_estimator", "n_stumps", "random_state"] + ) + self.param_names = ["n_estimators", "base_estimator", "n_stumps", + "random_state"] self.classed_params = ["base_estimator"] - self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier(max_depth=1)], [n_stumps], [random_state]] + self.distribs = [CustomRandint(low=1, high=500), + [DecisionTreeClassifier(max_depth=1)], [n_stumps], + [random_state]] self.weird_strings = {"base_estimator": "class_name"} self.plotted_metric = Metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" self.step_predictions = None self.estimators_generator = "Stumps" - self.n_stumps=n_stumps - self.self_complemented=self_complemeted + self.n_stumps = n_stumps + self.self_complemented = self_complemeted def fit(self, X, y, sample_weight=None): begin = time.time() pregen_X, pregen_y = self.pregen_voters(X, y) - super(AdaboostPregen, self).fit(pregen_X, pregen_y, sample_weight=sample_weight) + super(AdaboostPregen, self).fit(pregen_X, pregen_y, + sample_weight=sample_weight) end = time.time() - self.train_time = end-begin + self.train_time = end - begin self.train_shape = pregen_X.shape - self.base_predictions = np.array([change_label_to_zero(estim.predict(pregen_X)) for estim in self.estimators_]) - self.metrics = np.array([self.plotted_metric.score(change_label_to_zero(pred), y) for pred in self.staged_predict(pregen_X)]) - self.bounds = np.array([np.prod(np.sqrt(1-4*np.square(0.5-self.estimator_errors_[:i+1]))) for i in range(self.estimator_errors_.shape[0])]) - + self.base_predictions = np.array( + [change_label_to_zero(estim.predict(pregen_X)) for estim in + self.estimators_]) + self.metrics = np.array( + [self.plotted_metric.score(change_label_to_zero(pred), y) for pred + in self.staged_predict(pregen_X)]) + self.bounds = np.array([np.prod( + np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) + for i in + range(self.estimator_errors_.shape[0])]) def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -59,7 +71,9 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, PregenClassifie end = time.time() self.pred_time = end - begin if pregen_X.shape != self.train_shape: - self.step_predictions = np.array([change_label_to_zero(step_pred) for step_pred in self.staged_predict(pregen_X)]) + self.step_predictions = np.array( + [change_label_to_zero(step_pred) for step_pred in + self.staged_predict(pregen_X)]) return change_label_to_zero(pred) # def set_params(self, **params): @@ -68,19 +82,29 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, PregenClassifie # self.n_stumps_per_attribute = params["n_tumps"] # return self - def getInterpret(self, directory, y_test): interpretString = "" interpretString += self.getFeatureImportance(directory) interpretString += "\n\n Estimator error | Estimator weight\n" - interpretString += "\n".join([str(error) +" | "+ str(weight/sum(self.estimator_weights_)) for error, weight in zip(self.estimator_errors_, self.estimator_weights_)]) - step_test_metrics = np.array([self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) - get_accuracy_graph(step_test_metrics, "AdaboostPregen", directory + "test_metrics.png", + interpretString += "\n".join( + [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for + error, weight in + zip(self.estimator_errors_, self.estimator_weights_)]) + step_test_metrics = np.array( + [self.plotted_metric.score(y_test, step_pred) for step_pred in + self.step_predictions]) + get_accuracy_graph(step_test_metrics, "AdaboostPregen", + directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "AdaboostPregen", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds), bound_name="boosting bound") - np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') + get_accuracy_graph(self.metrics, "AdaboostPregen", + directory + "metrics.png", self.plotted_metric_name, + bounds=list(self.bounds), + bound_name="boosting bound") + np.savetxt(directory + "test_metrics.csv", step_test_metrics, + delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') - np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',') + np.savetxt(directory + "times.csv", + np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString # def pregen_voters(self, X, y=None): @@ -95,11 +119,12 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, PregenClassifie # neg_y=None # classification_matrix = self._binary_classification_matrix(X) + def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {'n_estimators': args.AdP_n_est, 'base_estimator': DecisionTreeClassifier(max_depth=1), - 'n_stumps':args.AdP_stumps} + 'n_stumps': args.AdP_stumps} return kwargsDict diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen10.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen10.py index 4e43632507c22f702ecd582cc26f80db212aab8f..7d6763e8f752d666828fa9f59b02dd5ef350acaa 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen10.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregen10.py @@ -1,4 +1,5 @@ from sklearn.tree import DecisionTreeClassifier + from .AdaboostPregen import AdaboostPregen # Author-Info @@ -9,14 +10,17 @@ __status__ = "Prototype" # Production, Development, Prototype class AdaboostPregen10(AdaboostPregen): def __init__(self, random_state=None, n_estimators=50, - base_estimator=None, n_stumps=1, self_complemeted=True , **kwargs): + base_estimator=None, n_stumps=1, self_complemeted=True, + **kwargs): super(AdaboostPregen10, self).__init__( random_state=random_state, n_estimators=100, base_estimator=base_estimator, n_stumps=10, self_complemeted=self_complemeted - ) + ) + + def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {'n_estimators': args.AdP_n_est, diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py index 2d52f3e9b87481be0dcbf96edb0402b1c06c075a..7e4d67377fd43eae8e50cf5a5a5f95599e52fede 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py @@ -1,54 +1,67 @@ +import time + +import numpy as np from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier -import numpy as np -import time -from sklearn.metrics import accuracy_score -from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero -from ..Monoview.Additions.BoostUtils import get_accuracy_graph from .. import Metrics -from ..Monoview.Additions.BoostUtils import get_accuracy_graph, StumpsClassifiersGenerator, BaseBoost +from ..Monoview.Additions.BoostUtils import get_accuracy_graph from ..Monoview.Additions.PregenUtils import PregenClassifier +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier, \ + change_label_to_zero # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -class AdaboostPregenTree(AdaBoostClassifier, BaseMonoviewClassifier, PregenClassifier): +class AdaboostPregenTree(AdaBoostClassifier, BaseMonoviewClassifier, + PregenClassifier): def __init__(self, random_state=None, n_estimators=50, - base_estimator=None, n_stumps=1, self_complemeted=True, max_depth=2, **kwargs): + base_estimator=None, n_stumps=1, self_complemeted=True, + max_depth=2, **kwargs): super(AdaboostPregenTree, self).__init__( random_state=random_state, n_estimators=n_estimators, base_estimator=base_estimator, algorithm="SAMME" - ) - self.param_names = ["n_estimators", "base_estimator", "n_stumps", "random_state", "max_depth"] + ) + self.param_names = ["n_estimators", "base_estimator", "n_stumps", + "random_state", "max_depth"] self.classed_params = ["base_estimator"] - self.distribs = [CustomRandint(low=1, high=500), [DecisionTreeClassifier(max_depth=1)], [n_stumps], [random_state], [max_depth]] + self.distribs = [CustomRandint(low=1, high=500), + [DecisionTreeClassifier(max_depth=1)], [n_stumps], + [random_state], [max_depth]] self.weird_strings = {"base_estimator": "class_name"} self.plotted_metric = Metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" self.step_predictions = None self.estimators_generator = "Trees" - self.n_stumps=n_stumps + self.n_stumps = n_stumps self.max_depth = max_depth - self.self_complemented=self_complemeted + self.self_complemented = self_complemeted self.random_state = random_state def fit(self, X, y, sample_weight=None): - pregen_X, pregen_y = self.pregen_voters(X, y, generator=self.estimators_generator) + pregen_X, pregen_y = self.pregen_voters(X, y, + generator=self.estimators_generator) begin = time.time() - super(AdaboostPregenTree, self).fit(pregen_X, pregen_y, sample_weight=sample_weight) + super(AdaboostPregenTree, self).fit(pregen_X, pregen_y, + sample_weight=sample_weight) end = time.time() - self.train_time = end-begin + self.train_time = end - begin self.train_shape = pregen_X.shape - self.base_predictions = np.array([change_label_to_zero(estim.predict(pregen_X)) for estim in self.estimators_]) - self.metrics = np.array([self.plotted_metric.score(change_label_to_zero(pred), y) for pred in self.staged_predict(pregen_X)]) - self.bounds = np.array([np.prod(np.sqrt(1-4*np.square(0.5-self.estimator_errors_[:i+1]))) for i in range(self.estimator_errors_.shape[0])]) - + self.base_predictions = np.array( + [change_label_to_zero(estim.predict(pregen_X)) for estim in + self.estimators_]) + self.metrics = np.array( + [self.plotted_metric.score(change_label_to_zero(pred), y) for pred + in self.staged_predict(pregen_X)]) + self.bounds = np.array([np.prod( + np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) + for i in + range(self.estimator_errors_.shape[0])]) def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -61,31 +74,43 @@ class AdaboostPregenTree(AdaBoostClassifier, BaseMonoviewClassifier, PregenClass end = time.time() self.pred_time = end - begin if pregen_X.shape != self.train_shape: - self.step_predictions = np.array([change_label_to_zero(step_pred) for step_pred in self.staged_predict(pregen_X)]) + self.step_predictions = np.array( + [change_label_to_zero(step_pred) for step_pred in + self.staged_predict(pregen_X)]) return change_label_to_zero(pred) - - def getInterpret(self, directory, y_test): interpretString = "" interpretString += self.getFeatureImportance(directory) interpretString += "\n\n Estimator error | Estimator weight\n" - interpretString += "\n".join([str(error) +" | "+ str(weight/sum(self.estimator_weights_)) for error, weight in zip(self.estimator_errors_, self.estimator_weights_)]) - step_test_metrics = np.array([self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) - get_accuracy_graph(step_test_metrics, "AdaboostPregen", directory + "test_metrics.png", + interpretString += "\n".join( + [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for + error, weight in + zip(self.estimator_errors_, self.estimator_weights_)]) + step_test_metrics = np.array( + [self.plotted_metric.score(y_test, step_pred) for step_pred in + self.step_predictions]) + get_accuracy_graph(step_test_metrics, "AdaboostPregen", + directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "AdaboostPregen", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds), bound_name="boosting bound") - np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') + get_accuracy_graph(self.metrics, "AdaboostPregen", + directory + "metrics.png", self.plotted_metric_name, + bounds=list(self.bounds), + bound_name="boosting bound") + np.savetxt(directory + "test_metrics.csv", step_test_metrics, + delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') - np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',') + np.savetxt(directory + "times.csv", + np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString + def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {'n_estimators': args.AdPT_n_est, 'base_estimator': DecisionTreeClassifier(max_depth=1), - 'n_stumps':args.AdPT_trees, - "max_depth":args.AdPT_max_depth} + 'n_stumps': args.AdPT_trees, + "max_depth": args.AdPT_max_depth} return kwargsDict diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py index ed2d9536c81e51aa4f7063ae08279dfedd0b1a85..89c84deed67d1e0e4cbb1efdb182133161851607 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc.py @@ -1,22 +1,22 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint -from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, **kwargs): + def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, + **kwargs): super(CGDesc, self).__init__(n_max_iterations=n_max_iterations, - random_state=random_state, - self_complemented=True, - twice_the_same=True, - c_bound_choice=True, - random_start=False, - n_stumps=n_stumps, - use_r=True, - c_bound_sol=True, - estimators_generator="Stumps" - ) + random_state=random_state, + self_complemented=True, + twice_the_same=True, + c_bound_choice=True, + random_start=False, + n_stumps=n_stumps, + use_r=True, + c_bound_sol=True, + estimators_generator="Stumps" + ) self.param_names = ["n_max_iterations", "n_stumps", "random_state"] self.distribs = [CustomRandint(low=2, high=500), [n_stumps], [random_state]] @@ -36,8 +36,8 @@ class CGDesc(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"n_stumps":args.CGD_stumps, - "n_max_iterations":args.CGD_n_iter} + kwargsDict = {"n_stumps": args.CGD_stumps, + "n_max_iterations": args.CGD_n_iter} return kwargsDict @@ -46,4 +46,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc10.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc10.py index bcda98ea354128ae63938c59e094e0ebb104c3c7..dbc628568ce589609620619e20c701e5f29375aa 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc10.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDesc10.py @@ -1,19 +1,19 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint -from ..Monoview.Additions.BoostUtils import getInterpretBase -from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar from .CGDesc import CGDesc + class CGDesc10(CGDesc): - def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, **kwargs): + def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, + **kwargs): super(CGDesc10, self).__init__(n_max_iterations=100, - random_state=random_state, - n_stumps=10,) + random_state=random_state, + n_stumps=10, ) + def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"n_stumps":args.CGD_stumps, - "n_max_iterations":args.CGD_n_iter} + kwargsDict = {"n_stumps": args.CGD_stumps, + "n_max_iterations": args.CGD_n_iter} return kwargsDict @@ -22,4 +22,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDescTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDescTree.py index 41344edece3c31d030a9104adb70d1b346f4d2b0..bef57eea4567d85b9aa42a518d3ffc59eb7a62b0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDescTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGDescTree.py @@ -1,24 +1,25 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint -from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint class CGDescTree(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, max_depth=2, **kwargs): + def __init__(self, random_state=None, n_max_iterations=500, n_stumps=1, + max_depth=2, **kwargs): super(CGDescTree, self).__init__(n_max_iterations=n_max_iterations, - random_state=random_state, - self_complemented=True, - twice_the_same=True, - c_bound_choice=True, - random_start=False, - n_stumps=n_stumps, - use_r=True, - c_bound_sol=True, - estimators_generator="Trees" - ) + random_state=random_state, + self_complemented=True, + twice_the_same=True, + c_bound_choice=True, + random_start=False, + n_stumps=n_stumps, + use_r=True, + c_bound_sol=True, + estimators_generator="Trees" + ) self.max_depth = max_depth - self.param_names = ["n_max_iterations", "n_stumps", "random_state", "max_depth"] + self.param_names = ["n_max_iterations", "n_stumps", "random_state", + "max_depth"] self.distribs = [CustomRandint(low=2, high=1000), [n_stumps], [random_state], [max_depth]] self.classed_params = [] @@ -37,8 +38,8 @@ class CGDescTree(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"n_stumps":args.CGDT_trees, - "n_max_iterations":args.CGDT_n_iter, + kwargsDict = {"n_stumps": args.CGDT_trees, + "n_max_iterations": args.CGDT_n_iter, "max_depth": args.CGDT_max_depth} return kwargsDict @@ -48,4 +49,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py index 9e25e40bed92d1cf15914394f02d00aa49d626b9..6030dc1e44021c7975794c488b12a245f73d8499 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py @@ -1,25 +1,26 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint -from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - def __init__(self, random_state=None, n_max_iterations=500, n_stumps=10, **kwargs): + def __init__(self, random_state=None, n_max_iterations=500, n_stumps=10, + **kwargs): super(CGreed, self).__init__(n_max_iterations=n_max_iterations, - random_state=random_state, - self_complemented=True, - twice_the_same=False, - c_bound_choice=True, - random_start=False, - n_stumps=n_stumps, - use_r=True, - c_bound_sol=True, - estimators_generator="Stumps" - ) + random_state=random_state, + self_complemented=True, + twice_the_same=False, + c_bound_choice=True, + random_start=False, + n_stumps=n_stumps, + use_r=True, + c_bound_sol=True, + estimators_generator="Stumps" + ) self.param_names = ["n_max_iterations", "n_stumps", "random_state"] - self.distribs = [CustomRandint(low=2, high=1000), [n_stumps], [random_state]] + self.distribs = [CustomRandint(low=2, high=1000), [n_stumps], + [random_state]] self.classed_params = [] self.weird_strings = {} @@ -36,8 +37,8 @@ class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"n_stumps":args.CGR_stumps, - "n_max_iterations":args.CGR_n_iter} + kwargsDict = {"n_stumps": args.CGR_stumps, + "n_max_iterations": args.CGR_n_iter} return kwargsDict @@ -46,4 +47,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py index 14d654531b5183944f1e4913941ccb65229089e9..9ed52604f4745fb0f6f05616a654a89da19287b4 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py @@ -1,13 +1,15 @@ -from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier -from ..Monoview.Additions.CQBoostUtils import ColumnGenerationClassifier +import numpy as np + from ..Monoview.Additions.BoostUtils import getInterpretBase +from ..Monoview.Additions.CQBoostUtils import ColumnGenerationClassifier +from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, \ + BaseMonoviewClassifier -import numpy as np -import os class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, n_max_iterations=None, **kwargs): + def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, + n_max_iterations=None, **kwargs): super(CQBoost, self).__init__( random_state=random_state, mu=mu, @@ -15,9 +17,11 @@ class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): estimators_generator="Stumps", n_max_iterations=n_max_iterations ) - self.param_names = ["mu", "epsilon", "n_stumps", "random_state", "n_max_iterations"] + self.param_names = ["mu", "epsilon", "n_stumps", "random_state", + "n_max_iterations"] self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), - CustomRandint(low=1, high=15, multiplier="e-"), [n_stumps], [random_state], [n_max_iterations]] + CustomRandint(low=1, high=15, multiplier="e-"), + [n_stumps], [random_state], [n_max_iterations]] self.classed_params = [] self.weird_strings = {} self.n_stumps = n_stumps @@ -26,13 +30,13 @@ class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): else: self.nbCores = kwargs["nbCores"] - def canProbas(self): """Used to know if the classifier can return label probabilities""" return True def getInterpret(self, directory, y_test): - np.savetxt(directory + "train_metrics.csv", self.train_metrics, delimiter=',') + np.savetxt(directory + "train_metrics.csv", self.train_metrics, + delimiter=',') np.savetxt(directory + "c_bounds.csv", self.c_bounds, delimiter=',') np.savetxt(directory + "y_test_step.csv", self.step_decisions, @@ -45,15 +49,16 @@ class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): step_metrics = np.array(step_metrics) np.savetxt(directory + "step_test_metrics.csv", step_metrics, delimiter=',') - return getInterpretBase(self, directory, "CQBoost", self.weights_, y_test) + return getInterpretBase(self, directory, "CQBoost", self.weights_, + y_test) def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {"mu": args.CQB_mu, "epsilon": args.CQB_epsilon, - "n_stumps":args.CQB_stumps, - "n_max_iterations":args.CQB_n_iter} + "n_stumps": args.CQB_stumps, + "n_max_iterations": args.CQB_n_iter} return kwargsDict @@ -61,6 +66,6 @@ def paramsToSet(nIter, randomState): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): - paramsSet.append({"mu": 10**-randomState.uniform(0.5, 1.5), - "epsilon": 10**-randomState.randint(1, 15)}) + paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), + "epsilon": 10 ** -randomState.randint(1, 15)}) return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py index 25c2a5ea5412d1ffcd6752c6909375b0d5deda28..d66eef7675d6e906d4036001b7867a630d27bd87 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostTree.py @@ -1,13 +1,15 @@ -from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier -from ..Monoview.Additions.CQBoostUtils import ColumnGenerationClassifier +import numpy as np + from ..Monoview.Additions.BoostUtils import getInterpretBase +from ..Monoview.Additions.CQBoostUtils import ColumnGenerationClassifier +from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, \ + BaseMonoviewClassifier -import numpy as np -import os class CQBoostTree(ColumnGenerationClassifier, BaseMonoviewClassifier): - def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, max_depth=2, n_max_iterations=100, **kwargs): + def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, n_stumps=1, + max_depth=2, n_max_iterations=100, **kwargs): super(CQBoostTree, self).__init__( random_state=random_state, mu=mu, @@ -15,9 +17,12 @@ class CQBoostTree(ColumnGenerationClassifier, BaseMonoviewClassifier): estimators_generator="Trees", n_max_iterations=n_max_iterations ) - self.param_names = ["mu", "epsilon", "n_stumps", "random_state", "max_depth", "n_max_iterations"] + self.param_names = ["mu", "epsilon", "n_stumps", "random_state", + "max_depth", "n_max_iterations"] self.distribs = [CustomUniform(loc=0.5, state=1.0, multiplier="e-"), - CustomRandint(low=1, high=15, multiplier="e-"), [n_stumps], [random_state], [max_depth], [n_max_iterations]] + CustomRandint(low=1, high=15, multiplier="e-"), + [n_stumps], [random_state], [max_depth], + [n_max_iterations]] self.classed_params = [] self.weird_strings = {} self.n_stumps = n_stumps @@ -32,7 +37,8 @@ class CQBoostTree(ColumnGenerationClassifier, BaseMonoviewClassifier): return True def getInterpret(self, directory, y_test): - np.savetxt(directory + "train_metrics.csv", self.train_metrics, delimiter=',') + np.savetxt(directory + "train_metrics.csv", self.train_metrics, + delimiter=',') np.savetxt(directory + "c_bounds.csv", self.c_bounds, delimiter=',') np.savetxt(directory + "y_test_step.csv", self.step_decisions, @@ -45,16 +51,17 @@ class CQBoostTree(ColumnGenerationClassifier, BaseMonoviewClassifier): step_metrics = np.array(step_metrics) np.savetxt(directory + "step_test_metrics.csv", step_metrics, delimiter=',') - return getInterpretBase(self, directory, "CQBoost", self.weights_, y_test) + return getInterpretBase(self, directory, "CQBoost", self.weights_, + y_test) def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {"mu": args.CQBT_mu, "epsilon": args.CQBT_epsilon, - "n_stumps":args.CQBT_trees, - "max_depth":args.CQBT_max_depth, - "n_max_iterations":args.CQBT_n_iter} + "n_stumps": args.CQBT_trees, + "max_depth": args.CQBT_max_depth, + "n_max_iterations": args.CQBT_n_iter} return kwargsDict @@ -62,6 +69,6 @@ def paramsToSet(nIter, randomState): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): - paramsSet.append({"mu": 10**-randomState.uniform(0.5, 1.5), - "epsilon": 10**-randomState.randint(1, 15)}) + paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), + "epsilon": 10 ** -randomState.randint(1, 15)}) return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv2.py index 28f121c917375a2f3aca517c3996efed56d48d84..a308b9baeea0e456ceec5cd3f7d3a8c6388adf69 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv2.py @@ -1,63 +1,63 @@ -import scipy -import logging -import numpy.ma as ma -from collections import defaultdict -from sklearn.utils.validation import check_is_fitted -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.pipeline import Pipeline -from sklearn.metrics import accuracy_score import numpy as np -import time -from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CQBoostUtils import ColumnGenerationClassifier +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, \ + BaseMonoviewClassifier + class ColumnGenerationClassifierv2(ColumnGenerationClassifier): def __init__(self, mu=0.01, epsilon=1e-06, random_state=None): - super(ColumnGenerationClassifierv2, self).__init__(mu=mu, epsilon=epsilon, random_state=random_state) + super(ColumnGenerationClassifierv2, self).__init__(mu=mu, + epsilon=epsilon, + random_state=random_state) def initialize(self): self.weights_ = [] self.edge_scores = [] self.alphas = [] - def update_values(self, h_values=None, worst_h_index=None, alpha=None, w=None): + def update_values(self, h_values=None, worst_h_index=None, alpha=None, + w=None): self.edge_scores.append(h_values[worst_h_index]) self.alphas.append(alpha) self.weights_.append(w[-1]) def get_margins(self, w=None): self.weights = np.array(self.weights_) - self.final_vote_weights = np.array([np.prod(1 - self.weights[t + 1:]) * self.weights_[t] if t < - self.weights.shape[ - 0] - 1 else - self.weights[t] for t in range(self.weights.shape[0])]) - margins = np.squeeze(np.asarray(np.matmul(self.classification_matrix[:, self.chosen_columns_], - self.final_vote_weights))) + self.final_vote_weights = np.array( + [np.prod(1 - self.weights[t + 1:]) * self.weights_[t] if t < + self.weights.shape[ + 0] - 1 else + self.weights[t] for t in range(self.weights.shape[0])]) + margins = np.squeeze(np.asarray( + np.matmul(self.classification_matrix[:, self.chosen_columns_], + self.final_vote_weights))) return margins def compute_weights_(self, w=None): self.weights_ = np.array(self.weights_) - self.final_vote_weights = np.array([np.prod(1 - self.weights_[t + 1:]) * self.weights_[t] if t < - self.weights_.shape[ - 0] - 1 else - self.weights_[t] for t in range(self.weights_.shape[0])]) + self.final_vote_weights = np.array( + [np.prod(1 - self.weights_[t + 1:]) * self.weights_[t] if t < + self.weights_.shape[ + 0] - 1 else + self.weights_[t] for t in range(self.weights_.shape[0])]) self.weights_ = self.final_vote_weights def get_matrix_to_optimize(self, y_kernel_matrix, w=None): m = self.n_total_examples if w is not None: - matrix_to_optimize = np.concatenate((np.matmul(self.matrix_to_optimize, w).reshape((m, 1)), - y_kernel_matrix[:, self.chosen_columns_[-1]].reshape((m, 1))), - axis=1) + matrix_to_optimize = np.concatenate( + (np.matmul(self.matrix_to_optimize, w).reshape((m, 1)), + y_kernel_matrix[:, self.chosen_columns_[-1]].reshape((m, 1))), + axis=1) else: - matrix_to_optimize = y_kernel_matrix[:, self.chosen_columns_[-1]].reshape((m, 1)) + matrix_to_optimize = y_kernel_matrix[:, + self.chosen_columns_[-1]].reshape((m, 1)) return matrix_to_optimize - class CQBoostv2(ColumnGenerationClassifierv2, BaseMonoviewClassifier): def __init__(self, random_state=None, mu=0.01, epsilon=1e-06, **kwargs): @@ -77,7 +77,7 @@ class CQBoostv2(ColumnGenerationClassifierv2, BaseMonoviewClassifier): return True def getInterpret(self, directory, y_test): - return getInterpretBase(self, directory, "CQBoostv2", self.weights_,) + return getInterpretBase(self, directory, "CQBoostv2", self.weights_, ) def get_name_for_fusion(self): return "CQB2" @@ -94,11 +94,10 @@ def paramsToSet(nIter, randomState): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): - paramsSet.append({"mu": 10**-randomState.uniform(0.5, 1.5), - "epsilon": 10**-randomState.randint(1, 15)}) + paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), + "epsilon": 10 ** -randomState.randint(1, 15)}) return paramsSet - # class CQBoostv2(CqBoostClassifierv2): # # def __init__(self, random_state, **kwargs): @@ -232,4 +231,3 @@ def paramsToSet(nIter, randomState): # # def getInterpret(classifier, directory): # return getInterpretBase(classifier, directory, "CQBoostv2", classifier.final_vote_weights) - diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py index cf808f01e1b662ed6241c3a4694128e632b5d3cc..9875b22a2a480a149df3560a946a5d2d51658e6e 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoostv21.py @@ -1,19 +1,24 @@ -import scipy import logging +import time +from collections import defaultdict + import numpy as np import numpy.ma as ma -from collections import defaultdict -from sklearn.utils.validation import check_is_fitted +import scipy from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.metrics import accuracy_score -import time +from sklearn.utils.validation import check_is_fitted -from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator, sign, getInterpretBase, BaseBoost +from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator, sign, \ + getInterpretBase, BaseBoost +from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, \ + BaseMonoviewClassifier class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): - def __init__(self, epsilon=1e-06, n_max_iterations=None, estimators_generator=None, dual_constraint_rhs=0, save_iteration_as_hyperparameter_each=None, random_state=42): + def __init__(self, epsilon=1e-06, n_max_iterations=None, + estimators_generator=None, dual_constraint_rhs=0, + save_iteration_as_hyperparameter_each=None, random_state=42): super(ColumnGenerationClassifierv21, self).__init__() self.epsilon = epsilon self.n_max_iterations = n_max_iterations @@ -28,27 +33,28 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): X = np.array(X.todense()) if self.estimators_generator is None: - self.estimators_generator = StumpsClassifiersGenerator(n_stumps_per_attribute=self.n_stumps, self_complemented=True) + self.estimators_generator = StumpsClassifiersGenerator( + n_stumps_per_attribute=self.n_stumps, self_complemented=True) y[y == 0] = -1 self.estimators_generator.fit(X, y) self.classification_matrix = self._binary_classification_matrix(X) - self.weights_ = [] self.infos_per_iteration_ = defaultdict(list) m, n = self.classification_matrix.shape - y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), self.classification_matrix) + y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), + self.classification_matrix) - # Initialization + # Initialization w = None self.collected_weight_vectors_ = {} self.collected_dual_constraint_violations_ = {} - example_weights = self._initialize_alphas(m).reshape((m,1)) + example_weights = self._initialize_alphas(m).reshape((m, 1)) self.chosen_columns_ = [] self.fobidden_columns = [] @@ -60,23 +66,30 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): self.n_total_hypotheses_ = n self.n_total_examples = m # print("\n \t\t Start fit\n") - for k in range(min(n, self.n_max_iterations if self.n_max_iterations is not None else np.inf)): + for k in range(min(n, + self.n_max_iterations if self.n_max_iterations is not None else np.inf)): # Find worst weak hypothesis given alpha. - new_voter_index, criterion = self._find_new_voter(example_weights, y_kernel_matrix, "pseudo_h") + new_voter_index, criterion = self._find_new_voter(example_weights, + y_kernel_matrix, + "pseudo_h") # Append the weak hypothesis. self.chosen_columns_.append(new_voter_index) self.fobidden_columns.append(new_voter_index) - new_voter_margin = y_kernel_matrix[:, self.chosen_columns_[-1]].reshape((m, 1)) + new_voter_margin = y_kernel_matrix[:, + self.chosen_columns_[-1]].reshape((m, 1)) self.edge_scores.append(criterion) if w is None: self.previous_vote = new_voter_margin w = 1 self.weights_.append(w) - example_weights = self._update_example_weights(example_weights, y_kernel_matrix, m) + example_weights = self._update_example_weights(example_weights, + y_kernel_matrix, + m) self.example_weights_.append(example_weights) - self.train_accuracies.append(accuracy_score(y, np.sign(self.previous_vote))) + self.train_accuracies.append( + accuracy_score(y, np.sign(self.previous_vote))) continue # ---- On resoud le probleme a deux votants analytiquement. @@ -85,28 +98,33 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): self.chosen_columns_.pop() self.break_cause = w[1] break - self.previous_vote = np.matmul(np.concatenate((self.previous_vote, new_voter_margin), axis=1), - w).reshape((m,1)) + self.previous_vote = np.matmul( + np.concatenate((self.previous_vote, new_voter_margin), axis=1), + w).reshape((m, 1)) # We collect iteration information for later evaluation. self.weights_.append(w[-1]) self.weights = np.array(self.weights_) - self.final_vote_weights = np.array([np.prod(1 - self.weights[t + 1:]) * self.weights[t] if t < - self.weights.shape[ - 0] - 1 else - self.weights[t] for t in range(self.weights.shape[0])]) - margins = np.squeeze(np.asarray(np.matmul(self.classification_matrix[:, self.chosen_columns_], - self.final_vote_weights))) + self.final_vote_weights = np.array( + [np.prod(1 - self.weights[t + 1:]) * self.weights[t] if t < + self.weights.shape[ + 0] - 1 else + self.weights[t] for t in range(self.weights.shape[0])]) + margins = np.squeeze(np.asarray( + np.matmul(self.classification_matrix[:, self.chosen_columns_], + self.final_vote_weights))) signs_array = np.array([int(x) for x in sign(margins)]) self.train_accuracies.append(accuracy_score(y, signs_array)) # ---- On change l'edge - example_weights = self._update_example_weights(example_weights, y_kernel_matrix, m) + example_weights = self._update_example_weights(example_weights, + y_kernel_matrix, m) self.example_weights_.append(example_weights) self.nb_opposed_voters = self.check_opposed_voters() - self.estimators_generator.estimators_ = self.estimators_generator.estimators_[self.chosen_columns_] + self.estimators_generator.estimators_ = \ + self.estimators_generator.estimators_[self.chosen_columns_] y[y == -1] = 0 @@ -121,41 +139,58 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): X = np.array(X.todense()) classification_matrix = self._binary_classification_matrix(X) self.weights_ = np.array(self.weights_) - self.final_vote_weights = np.array([np.prod(1-self.weights_[t+1:])*self.weights_[t] if t < self.weights_.shape[0]-1 else self.weights_[t] for t in range(self.weights_.shape[0]) ]) - margins = np.squeeze(np.asarray(np.matmul(classification_matrix, self.final_vote_weights))) + self.final_vote_weights = np.array([np.prod(1 - self.weights_[t + 1:]) * + self.weights_[t] if t < + self.weights_.shape[ + 0] - 1 else + self.weights_[t] for t in + range(self.weights_.shape[0])]) + margins = np.squeeze(np.asarray( + np.matmul(classification_matrix, self.final_vote_weights))) signs_array = np.array([int(x) for x in sign(margins)]) - signs_array[signs_array == -1 ] = 0 + signs_array[signs_array == -1] = 0 end = time.time() - self.predict_time = end-start + self.predict_time = end - start return signs_array - def _find_new_voter(self, example_weights, y_kernel_matrix, type="pseudo_h"): + def _find_new_voter(self, example_weights, y_kernel_matrix, + type="pseudo_h"): if type == "pseudo_h": - pseudo_h_values = ma.array(np.squeeze(np.array(example_weights.T.dot(y_kernel_matrix).T)), fill_value=-np.inf) + pseudo_h_values = ma.array( + np.squeeze(np.array(example_weights.T.dot(y_kernel_matrix).T)), + fill_value=-np.inf) pseudo_h_values[self.fobidden_columns] = ma.masked worst_h_index = ma.argmax(pseudo_h_values) return worst_h_index, pseudo_h_values[worst_h_index] elif type == "random": - new_index = self.random_state.choice(np.arange(self.n_total_hypotheses_)) + new_index = self.random_state.choice( + np.arange(self.n_total_hypotheses_)) while new_index in self.fobidden_columns: - new_index = self.random_state.choice(np.arange(self.n_total_hypotheses_)) + new_index = self.random_state.choice( + np.arange(self.n_total_hypotheses_)) return new_index, 100 def _update_example_weights(self, example_weights, y_kernel_matrix, m): - if len(self.weights_)==1: + if len(self.weights_) == 1: example_weights[self.previous_vote == -1] *= 2 - example_weights[self.previous_vote == 1 ] /= 2 + example_weights[self.previous_vote == 1] /= 2 pass else: weights = np.array(self.weights_) - current_vote_weights = np.array([np.prod(1 - weights[t + 1:]) * weights[t] if t < - weights.shape[ - 0] - 1 else - weights[t] for t in range(weights.shape[0])]).reshape((weights.shape[0], 1)) - weighted_margin = np.matmul(y_kernel_matrix[:, self.chosen_columns_], current_vote_weights) + current_vote_weights = np.array( + [np.prod(1 - weights[t + 1:]) * weights[t] if t < + weights.shape[ + 0] - 1 else + weights[t] for t in range(weights.shape[0])]).reshape( + (weights.shape[0], 1)) + weighted_margin = np.matmul( + y_kernel_matrix[:, self.chosen_columns_], current_vote_weights) example_weights = np.multiply(example_weights, - np.exp((1 - np.sum(weighted_margin, axis=1) / - np.sum(weighted_margin, axis=1))).reshape((m, 1))) + np.exp((1 - np.sum(weighted_margin, + axis=1) / + np.sum(weighted_margin, + axis=1))).reshape( + (m, 1))) return example_weights def _solve_two_weights_min_c(self, next_column, example_weights): @@ -163,17 +198,21 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): zero_diag = np.ones((m, m)) - np.identity(m) weighted_previous_vote = self.previous_vote.reshape((m, 1)) - weighted_next_column = next_column.reshape((m,1)) + weighted_next_column = next_column.reshape((m, 1)) mat_prev = np.repeat(weighted_previous_vote, m, axis=1) * zero_diag mat_next = np.repeat(weighted_next_column, m, axis=1) * zero_diag self.B2 = np.sum((weighted_previous_vote - weighted_next_column) ** 2) - self.B1 = np.sum(2 * weighted_next_column * (weighted_previous_vote - 2 * weighted_next_column * weighted_next_column)) + self.B1 = np.sum(2 * weighted_next_column * ( + weighted_previous_vote - 2 * weighted_next_column * weighted_next_column)) self.B0 = np.sum(weighted_next_column * weighted_next_column) - self.A2 = self.B2 + np.sum((mat_prev - mat_next) * np.transpose(mat_prev - mat_next)) - self.A1 = self.B1 + np.sum(mat_prev * np.transpose(mat_next) - mat_next * np.transpose(mat_prev) - 2 * mat_next * np.transpose(mat_next)) + self.A2 = self.B2 + np.sum( + (mat_prev - mat_next) * np.transpose(mat_prev - mat_next)) + self.A1 = self.B1 + np.sum( + mat_prev * np.transpose(mat_next) - mat_next * np.transpose( + mat_prev) - 2 * mat_next * np.transpose(mat_next)) self.A0 = self.B0 + np.sum(mat_next * np.transpose(mat_next)) C2 = (self.A1 * self.B2 - self.A2 * self.B1) @@ -195,13 +234,13 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): is_acceptable, sol = self._analyze_solutions(sols) if is_acceptable: # print("cb", self._cborn(sol)) - return np.array([sol, 1-sol]) + return np.array([sol, 1 - sol]) else: return ["break", sol] def _analyze_solutions(self, sols): if sols.shape[0] == 1: - if self._cborn(sols[0]) < self._cborn(sols[0]+1): + if self._cborn(sols[0]) < self._cborn(sols[0] + 1): best_sol = sols[0] else: return False, " the only solution was a maximum." @@ -219,7 +258,8 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): return False, " the minimum was over 1." def _cborn(self, sol): - return 1 - (self.A2*sol**2 + self.A1*sol + self.A0)/(self.B2*sol**2 + self.B1*sol + self.B0) + return 1 - (self.A2 * sol ** 2 + self.A1 * sol + self.A0) / ( + self.B2 * sol ** 2 + self.B1 * sol + self.B0) def _best_sol(self, sols): values = np.array([self._cborn(sol) for sol in sols]) @@ -230,9 +270,14 @@ class ColumnGenerationClassifierv21(BaseEstimator, ClassifierMixin, BaseBoost): class CqBoostClassifierv21(ColumnGenerationClassifierv21): - def __init__(self, mu=0.001, epsilon=1e-08, n_max_iterations=None, estimators_generator=None, save_iteration_as_hyperparameter_each=None, random_state=42): - super(CqBoostClassifierv21, self).__init__(epsilon, n_max_iterations, estimators_generator, dual_constraint_rhs=0, - save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each, random_state=random_state) + def __init__(self, mu=0.001, epsilon=1e-08, n_max_iterations=None, + estimators_generator=None, + save_iteration_as_hyperparameter_each=None, random_state=42): + super(CqBoostClassifierv21, self).__init__(epsilon, n_max_iterations, + estimators_generator, + dual_constraint_rhs=0, + save_iteration_as_hyperparameter_each=save_iteration_as_hyperparameter_each, + random_state=random_state) self.train_time = 0 self.mu = mu @@ -259,7 +304,8 @@ class CQBoostv21(CqBoostClassifierv21, BaseMonoviewClassifier): return True def getInterpret(self, directory, y_test): - return getInterpretBase(self, directory, "CQBoostv21", self.weights_, self.break_cause) + return getInterpretBase(self, directory, "CQBoostv21", self.weights_, + self.break_cause) def get_name_for_fusion(self): return "CQ21" @@ -276,6 +322,6 @@ def paramsToSet(nIter, randomState): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] for _ in range(nIter): - paramsSet.append({"mu": 10**-randomState.uniform(0.5, 1.5), - "epsilon": 10**-randomState.randint(1, 15)}) - return paramsSet \ No newline at end of file + paramsSet.append({"mu": 10 ** -randomState.uniform(0.5, 1.5), + "epsilon": 10 ** -randomState.randint(1, 15)}) + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py index 7ff6129659de33f60edf56f42133c60206c07d0c..4aec553905287fe2b860325d10d5c59b7ca85c23 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -16,8 +16,9 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier): criterion=criterion, splitter=splitter, random_state=random_state - ) - self.param_names = ["max_depth", "criterion", "splitter",'random_state'] + ) + self.param_names = ["max_depth", "criterion", "splitter", + 'random_state'] self.classed_params = [] self.distribs = [CustomRandint(low=1, high=300), ["gini", "entropy"], @@ -48,4 +49,4 @@ def paramsToSet(nIter, randomState): paramsSet.append({"max_depth": randomState.randint(1, 300), "criterion": randomState.choice(["gini", "entropy"]), "splitter": randomState.choice(["best", "random"])}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py index 14f554e5281b3b267188f4feed31293e16c616c6..75fa10cc1ef5001674c02c3f842e2ce7dfaa4854 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTreePregen.py @@ -1,29 +1,34 @@ -from sklearn.tree import DecisionTreeClassifier import time + import numpy as np +from sklearn.tree import DecisionTreeClassifier -from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero from ..Monoview.Additions.PregenUtils import PregenClassifier +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier, \ + change_label_to_zero # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -class DecisionTreePregen(DecisionTreeClassifier, BaseMonoviewClassifier, PregenClassifier): +class DecisionTreePregen(DecisionTreeClassifier, BaseMonoviewClassifier, + PregenClassifier): def __init__(self, random_state=None, max_depth=None, - criterion='gini', splitter='best', n_stumps=1, self_complemented=True, **kwargs): + criterion='gini', splitter='best', n_stumps=1, + self_complemented=True, **kwargs): super(DecisionTreePregen, self).__init__( max_depth=max_depth, criterion=criterion, splitter=splitter, random_state=random_state - ) + ) self.estimators_generator = "Stumps" self.n_stumps = n_stumps self.self_complemented = self_complemented - self.param_names = ["max_depth", "criterion", "splitter",'random_state', + self.param_names = ["max_depth", "criterion", "splitter", + 'random_state', 'n_stumps'] self.classed_params = [] self.distribs = [CustomRandint(low=1, high=300), @@ -47,7 +52,8 @@ class DecisionTreePregen(DecisionTreeClassifier, BaseMonoviewClassifier, PregenC def predict(self, X, check_input=True): begin = time.time() pregen_X, _ = self.pregen_voters(X) - pred = super(DecisionTreePregen, self).predict(pregen_X, check_input=check_input) + pred = super(DecisionTreePregen, self).predict(pregen_X, + check_input=check_input) end = time.time() self.pred_time = end - begin return change_label_to_zero(pred) @@ -69,7 +75,7 @@ def formatCmdArgs(args): kwargsDict = {"max_depth": args.DTP_depth, "criterion": args.DTP_criterion, "splitter": args.DTP_splitter, - "n_stumps":args.DTP_stumps} + "n_stumps": args.DTP_stumps} return kwargsDict @@ -80,5 +86,3 @@ def paramsToSet(nIter, randomState): "criterion": randomState.choice(["gini", "entropy"]), "splitter": randomState.choice(["best", "random"])}) return paramsSet - - diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py index 493e5909995fbbb76208a1e252882c8b3402e8e8..8b6acba93ae808025f64109c303085bb1e7489bc 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/GradientBoosting.py @@ -1,11 +1,12 @@ -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import GradientBoostingClassifier import time + import numpy as np +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.tree import DecisionTreeClassifier -from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier from .. import Metrics from ..Monoview.Additions.BoostUtils import get_accuracy_graph +from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" @@ -14,9 +15,11 @@ __status__ = "Prototype" # Production, Development, Prototype class CustomDecisionTree(DecisionTreeClassifier): def predict(self, X, check_input=True): - y_pred = super(CustomDecisionTree, self).predict(X, check_input=check_input) + y_pred = super(CustomDecisionTree, self).predict(X, + check_input=check_input) return y_pred.reshape((y_pred.shape[0], 1)).astype(float) + class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): def __init__(self, random_state=None, loss="exponential", max_depth=1.0, @@ -29,10 +32,10 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): n_estimators=n_estimators, init=init, random_state=random_state - ) - self.param_names = ["n_estimators",] + ) + self.param_names = ["n_estimators", ] self.classed_params = [] - self.distribs = [CustomRandint(low=50, high=500),] + self.distribs = [CustomRandint(low=50, high=500), ] self.weird_strings = {} self.plotted_metric = Metrics.zero_one_loss self.plotted_metric_name = "zero_one_loss" @@ -47,7 +50,8 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): self.base_predictions = np.array( [estim[0].predict(X) for estim in self.estimators_]) self.metrics = np.array( - [self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) + [self.plotted_metric.score(pred, y) for pred in + self.staged_predict(X)]) # self.bounds = np.array([np.prod( # np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) for i # in range(self.estimator_errors_.shape[0])]) @@ -59,7 +63,8 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): end = time.time() self.pred_time = end - begin if X.shape != self.train_shape: - self.step_predictions = np.array([step_pred for step_pred in self.staged_predict(X)]) + self.step_predictions = np.array( + [step_pred for step_pred in self.staged_predict(X)]) return pred def canProbas(self): @@ -69,24 +74,30 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier): def getInterpret(self, directory, y_test): interpretString = "" interpretString += self.getFeatureImportance(directory) - step_test_metrics = np.array([self.plotted_metric.score(y_test, step_pred) for step_pred in self.step_predictions]) - get_accuracy_graph(step_test_metrics, "AdaboostClassic", directory + "test_metrics.png", + step_test_metrics = np.array( + [self.plotted_metric.score(y_test, step_pred) for step_pred in + self.step_predictions]) + get_accuracy_graph(step_test_metrics, "AdaboostClassic", + directory + "test_metrics.png", self.plotted_metric_name, set="test") - get_accuracy_graph(self.metrics, "AdaboostClassic", directory+"metrics.png", self.plotted_metric_name) - np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') + get_accuracy_graph(self.metrics, "AdaboostClassic", + directory + "metrics.png", self.plotted_metric_name) + np.savetxt(directory + "test_metrics.csv", step_test_metrics, + delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') - np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',') + np.savetxt(directory + "times.csv", + np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"n_estimators": args.GB_n_est,} + kwargsDict = {"n_estimators": args.GB_n_est, } return kwargsDict def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append({"n_estimators": randomState.randint(50, 500),}) - return paramsSet \ No newline at end of file + paramsSet.append({"n_estimators": randomState.randint(50, 500), }) + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py index 5fbcaeb825aac67d4d7c9ddac15fe5ae4f4ffd15..9774ef7f0cf8c2553b28faf5d4f322d9390efc69 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/KNN.py @@ -16,13 +16,15 @@ class KNN(KNeighborsClassifier, BaseMonoviewClassifier): weights=weights, algorithm=algorithm, p=p - ) - self.param_names = ["n_neighbors", "weights", "algorithm", "p", "random_state",] + ) + self.param_names = ["n_neighbors", "weights", "algorithm", "p", + "random_state", ] self.classed_params = [] self.distribs = [CustomRandint(low=1, high=10), ["uniform", "distance"], - ["auto", "ball_tree", "kd_tree", "brute"], [1, 2], [random_state]] + ["auto", "ball_tree", "kd_tree", "brute"], [1, 2], + [random_state]] self.weird_strings = {} - self.random_state=random_state + self.random_state = random_state def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -36,9 +38,9 @@ class KNN(KNeighborsClassifier, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {"n_neighbors": args.KNN_neigh, - "weights":args.KNN_weights, - "algorithm":args.KNN_algo, - "p":args.KNN_p} + "weights": args.KNN_weights, + "algorithm": args.KNN_algo, + "p": args.KNN_p} return kwargsDict @@ -46,7 +48,9 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({"n_neighbors": randomState.randint(1, 20), - "weights": randomState.choice(["uniform", "distance"]), - "algorithm": randomState.choice(["auto", "ball_tree", "kd_tree", "brute"]), + "weights": randomState.choice( + ["uniform", "distance"]), + "algorithm": randomState.choice( + ["auto", "ball_tree", "kd_tree", "brute"]), "p": randomState.choice([1, 2])}) return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Lasso.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Lasso.py index 1e4d3abe954bc0faa7cdd29c630d585f25c50179..19def37abb8bc1e7c9141f13364cac69eb1cd612 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Lasso.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Lasso.py @@ -1,7 +1,8 @@ -from sklearn.linear_model import Lasso import numpy as np +from sklearn.linear_model import Lasso -from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, \ + BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" @@ -17,7 +18,7 @@ class Lasso(Lasso, BaseMonoviewClassifier): max_iter=max_iter, warm_start=warm_start, random_state=random_state - ) + ) self.param_names = ["max_iter", "alpha", "random_state"] self.classed_params = [] self.distribs = [CustomRandint(low=1, high=300), @@ -26,17 +27,16 @@ class Lasso(Lasso, BaseMonoviewClassifier): def fit(self, X, y, check_input=True): neg_y = np.copy(y) - neg_y[np.where(neg_y==0)] = -1 + neg_y[np.where(neg_y == 0)] = -1 super(Lasso, self).fit(X, neg_y) return self def predict(self, X): prediction = super(Lasso, self).predict(X) signed = np.sign(prediction) - signed[np.where(signed==-1)] = 0 + signed[np.where(signed == -1)] = 0 return signed - def canProbas(self): """Used to know if the classifier can return label probabilities""" return False @@ -57,5 +57,5 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({"max_iter": randomState.randint(1, 300), - "alpha": randomState.uniform(0,1.0),}) - return paramsSet \ No newline at end of file + "alpha": randomState.uniform(0, 1.0), }) + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQ.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQ.py index 58f47d0e9dd5041717582c8ebe8290d5bbc4ed00..dcf6c5b642481395855186c3b2d5c260e25859d8 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQ.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQ.py @@ -1,10 +1,8 @@ -from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import getInterpretBase -from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import CustomUniform, BaseMonoviewClassifier #### Algorithm code #### -#-*- coding:utf-8 -*- +# -*- coding:utf-8 -*- """ MinCq learning algorithm Related papers: @@ -16,11 +14,13 @@ http://graal.ift.ulaval.ca/majorityvote/ __author__ = 'Jean-Francis Roy' import logging -import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, polynomial_kernel +from sklearn.metrics.pairwise import rbf_kernel, linear_kernel, \ + polynomial_kernel # from qp import QP from ..Monoview.Additions.BoostUtils import ConvexProgram as QP + + # from majority_vote import MajorityVote # from voter import StumpsVotersGenerator, KernelVotersGenerator @@ -54,8 +54,10 @@ class MinCqLearner(BaseEstimator, ClassifierMixin): Kernel coefficient for 'rbf' and 'poly'. If gamma is 0.0 then 1/n_features will be used instead. """ - def __init__(self, mu, voters_type, n_stumps_per_attribute=10, kernel='rbf', degree=3, gamma=0.0, self_complemented=True): - assert mu > 0 and mu <= 1, "MinCqLearner: mu parameter must be in (0, 1]" + + def __init__(self, mu, voters_type, n_stumps_per_attribute=10, kernel='rbf', + degree=3, gamma=0.0, self_complemented=True): + assert 0 < mu <= 1, "MinCqLearner: mu parameter must be in (0, 1]" self.mu = mu self.voters_type = voters_type self.n_stumps_per_attribute = n_stumps_per_attribute @@ -83,17 +85,19 @@ class MinCqLearner(BaseEstimator, ClassifierMixin): A priori generated voters """ # Preparation of the majority vote, using a voter generator that depends on class attributes - if (np.unique(y)!= [-1,1]).any(): + if (np.unique(y) != [-1, 1]).any(): y_reworked = np.copy(y) - y_reworked[np.where(y_reworked==0)] = -1 + y_reworked[np.where(y_reworked == 0)] = -1 else: y_reworked = y - assert self.voters_type in ['stumps', 'kernel', 'manual'], "MinCqLearner: voters_type must be 'stumps', 'kernel' or 'manual'" + assert self.voters_type in ['stumps', 'kernel', + 'manual'], "MinCqLearner: voters_type must be 'stumps', 'kernel' or 'manual'" if self.voters_type == 'manual': if voters is None: - logging.error("Manually set voters is True, but no voters have been set.") + logging.error( + "Manually set voters is True, but no voters have been set.") return self else: @@ -101,10 +105,12 @@ class MinCqLearner(BaseEstimator, ClassifierMixin): if self.voters_type == 'stumps': assert self.n_stumps_per_attribute >= 1, 'MinCqLearner: n_stumps_per_attribute must be positive' - voters_generator = StumpsVotersGenerator(self.n_stumps_per_attribute) + voters_generator = StumpsVotersGenerator( + self.n_stumps_per_attribute) elif self.voters_type == 'kernel': - assert self.kernel in ['linear', 'poly', 'rbf'], "MinCqLearner: kernel must be 'linear', 'poly' or 'rbf'" + assert self.kernel in ['linear', 'poly', + 'rbf'], "MinCqLearner: kernel must be 'linear', 'poly' or 'rbf'" gamma = self.gamma if gamma == 0.0: @@ -113,11 +119,15 @@ class MinCqLearner(BaseEstimator, ClassifierMixin): if self.kernel == 'linear': voters_generator = KernelVotersGenerator(linear_kernel) elif self.kernel == 'poly': - voters_generator = KernelVotersGenerator(polynomial_kernel, degree=self.degree, gamma=gamma) + voters_generator = KernelVotersGenerator(polynomial_kernel, + degree=self.degree, + gamma=gamma) elif self.kernel == 'rbf': - voters_generator = KernelVotersGenerator(rbf_kernel, gamma=gamma) + voters_generator = KernelVotersGenerator(rbf_kernel, + gamma=gamma) - voters = voters_generator.generate(X, y_reworked, self_complemented=self.self_complemented) + voters = voters_generator.generate(X, y_reworked, + self_complemented=self.self_complemented) if self.log: logging.info("MinCq training started...") @@ -140,12 +150,17 @@ class MinCqLearner(BaseEstimator, ClassifierMixin): # Conversion of the weights of the n first voters to weights on the implicit 2n voters. # See Section 7.1 of [2] for an explanation. - self.majority_vote.weights = np.array([2 * q - 1.0 / n_base_voters for q in solver_weights]) + self.majority_vote.weights = np.array( + [2 * q - 1.0 / n_base_voters for q in solver_weights]) if self.log: - logging.info("First moment of the margin on the training set: {:.4f}".format(np.mean(y_reworked * self.majority_vote.margin(X)))) + logging.info( + "First moment of the margin on the training set: {:.4f}".format( + np.mean(y_reworked * self.majority_vote.margin(X)))) except Exception as e: - logging.error("{}: Error while solving the quadratic program: {}.".format(str(self), str(e))) + logging.error( + "{}: Error while solving the quadratic program: {}.".format( + str(self), str(e))) self.majority_vote = None self.cbound_train = self.majority_vote.cbound_value(X, y_reworked) @@ -167,13 +182,15 @@ class MinCqLearner(BaseEstimator, ClassifierMixin): if self.log: logging.info("Predicting...") if self.majority_vote is None: - logging.error("{}: Error while predicting: MinCq has not been fit or fitting has failed. Will output invalid labels".format(str(self))) + logging.error( + "{}: Error while predicting: MinCq has not been fit or fitting has failed. Will output invalid labels".format( + str(self))) return np.zeros((len(X),)) if save_data: self.x_test = X vote = self.majority_vote.vote(X) - vote[np.where(vote==-1)] = 0 + vote[np.where(vote == -1)] = 0 return vote def predict_proba(self, X): @@ -221,12 +238,15 @@ class MinCqLearner(BaseEstimator, ClassifierMixin): classification_matrix = self.majority_vote.classification_matrix(X) # Objective function. - self.qp.quadratic_func = 2.0 / n_examples * classification_matrix.T.dot(classification_matrix) - self.qp.linear_func = np.matrix(np.matrix(-1.0 * np.mean(self.qp.quadratic_func / 2.0, axis=1))).T + self.qp.quadratic_func = 2.0 / n_examples * classification_matrix.T.dot( + classification_matrix) + self.qp.linear_func = np.matrix( + np.matrix(-1.0 * np.mean(self.qp.quadratic_func / 2.0, axis=1))).T # First moment of the margin fixed to mu. a_matrix = 2.0 / n_examples * y.T.dot(classification_matrix) - self.qp.add_equality_constraints(a_matrix, self.mu + 1.0/2 * np.mean(a_matrix)) + self.qp.add_equality_constraints(a_matrix, + self.mu + 1.0 / 2 * np.mean(a_matrix)) # Lower and upper bounds on the variables self.qp.add_lower_bound(0.0) @@ -244,11 +264,12 @@ class MajorityVote(object): weights : ndarray, optional (default: uniform distribution) The weights associated to each voter. """ + def __init__(self, voters, weights=None): self._voters = np.array(voters) if weights is not None: - assert(len(voters) == len(weights)) + assert (len(voters) == len(weights)) self._weights = np.array(weights) else: self._weights = np.array([1.0 / len(voters)] * len(voters)) @@ -283,7 +304,8 @@ class MajorityVote(object): The margin of the majority vote for each sample. """ classification_matrix = self.classification_matrix(X) - return np.squeeze(np.asarray(np.dot(classification_matrix, self.weights))) + return np.squeeze( + np.asarray(np.dot(classification_matrix, self.weights))) def classification_matrix(self, X): """ Returns the classification matrix of the majority vote. @@ -327,15 +349,20 @@ class MajorityVote(object): y : ndarray, shape=(n_samples, ) Input labels, where each label is either -1 or 1. """ - assert np.all(np.in1d(y, [-1, 1])), 'cbound_value: labels should be either -1 or 1' + assert np.all(np.in1d(y, [-1, + 1])), 'cbound_value: labels should be either -1 or 1' classification_matrix = self.classification_matrix(X) - first_moment = float(1.0/len(y) * classification_matrix.dot(self.weights).dot(y)) - second_moment = float(1.0/len(y) *self.weights.T.dot(classification_matrix.T.dot(classification_matrix)).dot(self.weights)) + first_moment = float( + 1.0 / len(y) * classification_matrix.dot(self.weights).dot(y)) + second_moment = float(1.0 / len(y) * self.weights.T.dot( + classification_matrix.T.dot(classification_matrix)).dot( + self.weights)) return 1 - (first_moment ** 2 / second_moment) -#-*- coding:utf-8 -*- + +# -*- coding:utf-8 -*- __author__ = "Jean-Francis Roy" import numpy as np @@ -344,6 +371,7 @@ import numpy as np class Voter(object): """ Base class for a voter (function X -> [-1, 1]), where X is an array of samples """ + def __init__(self): pass @@ -384,7 +412,7 @@ class BinaryKernelVoter(Voter): """ def __init__(self, x, y, kernel_function, **kwargs): - assert(y in {-1, 1}) + assert (y in {-1, 1}) super(BinaryKernelVoter, self).__init__() self._x = x self._y = y @@ -393,7 +421,8 @@ class BinaryKernelVoter(Voter): def vote(self, X): base_point_array = np.array([self._x]) - votes = self._y * self._kernel_function(base_point_array, X, **self._kernel_kwargs) + votes = self._y * self._kernel_function(base_point_array, X, + **self._kernel_kwargs) votes = np.squeeze(np.asarray(votes)) return votes @@ -414,6 +443,7 @@ class DecisionStumpVoter(Voter): direction : int (-1 or 1) Used to reverse classification decision """ + def __init__(self, attribute_index, threshold, direction=1): super(DecisionStumpVoter, self).__init__() self.attribute_index = attribute_index @@ -421,8 +451,9 @@ class DecisionStumpVoter(Voter): self.direction = direction def vote(self, points): - return [((point[self.attribute_index] > self.threshold) * 2 - 1) * self.direction for point in points] - + return [((point[ + self.attribute_index] > self.threshold) * 2 - 1) * self.direction + for point in points] class VotersGenerator(object): @@ -459,6 +490,7 @@ class StumpsVotersGenerator(VotersGenerator): n_stumps_per_attribute : int, (default=10) Determines how many decision stumps will be created for each attribute. """ + def __init__(self, n_stumps_per_attribute=10): self._n_stumps_per_attribute = n_stumps_per_attribute @@ -472,7 +504,8 @@ class StumpsVotersGenerator(VotersGenerator): maxi = x[i] return mini, maxi - def generate(self, X, y=None, self_complemented=False, only_complements=False): + def generate(self, X, y=None, self_complemented=False, + only_complements=False): voters = [] if len(X) != 0: for i in range(len(X[0])): @@ -485,10 +518,14 @@ class StumpsVotersGenerator(VotersGenerator): for x in range(self._n_stumps_per_attribute): if not only_complements: - voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), 1)) + voters.append( + DecisionStumpVoter(i, t[0] + inter * (x + 1), + 1)) if self_complemented or only_complements: - voters.append(DecisionStumpVoter(i, t[0] + inter * (x + 1), -1)) + voters.append( + DecisionStumpVoter(i, t[0] + inter * (x + 1), + -1)) return np.array(voters) @@ -510,7 +547,8 @@ class KernelVotersGenerator(VotersGenerator): self._kernel_function = kernel_function self._kernel_kwargs = kwargs - def generate(self, X, y=None, self_complemented=False, only_complements=False): + def generate(self, X, y=None, self_complemented=False, + only_complements=False): if y is None: y = np.array([1] * len(X)) @@ -518,25 +556,31 @@ class KernelVotersGenerator(VotersGenerator): for point, label in zip(X, y): if not only_complements: - voters.append(BinaryKernelVoter(point, label, self._kernel_function, **self._kernel_kwargs)) + voters.append( + BinaryKernelVoter(point, label, self._kernel_function, + **self._kernel_kwargs)) if self_complemented or only_complements: - voters.append(BinaryKernelVoter(point, -1 * label, self._kernel_function, **self._kernel_kwargs)) + voters.append( + BinaryKernelVoter(point, -1 * label, self._kernel_function, + **self._kernel_kwargs)) return np.array(voters) + class MinCQ(MinCqLearner, BaseMonoviewClassifier): - def __init__(self, random_state=None, mu=0.01, self_complemented=True , n_stumps_per_attribute=10, **kwargs): + def __init__(self, random_state=None, mu=0.01, self_complemented=True, + n_stumps_per_attribute=10, **kwargs): super(MinCQ, self).__init__(mu=mu, - voters_type='stumps', - n_stumps_per_attribute =n_stumps_per_attribute, - self_complemented=self_complemented - ) + voters_type='stumps', + n_stumps_per_attribute=n_stumps_per_attribute, + self_complemented=self_complemented + ) self.param_names = ["mu", "n_stumps_per_attribute", "random_state"] self.distribs = [CustomUniform(loc=0.5, state=2.0, multiplier="e-"), [n_stumps_per_attribute], [random_state]] - self.random_state=random_state + self.random_state = random_state self.classed_params = [] self.weird_strings = {} if "nbCores" not in kwargs: @@ -559,10 +603,11 @@ class MinCQ(MinCqLearner, BaseMonoviewClassifier): "n_stumps_per_attribute": self.n_stumps_per_attribute} def getInterpret(self, directory, y_test): - interpret_string = "Train C_bound value : "+str(self.cbound_train) + interpret_string = "Train C_bound value : " + str(self.cbound_train) y_rework = np.copy(y_test) - y_rework[np.where(y_rework==0)] = -1 - interpret_string += "\n Test c_bound value : "+str(self.majority_vote.cbound_value(self.x_test, y_rework)) + y_rework[np.where(y_rework == 0)] = -1 + interpret_string += "\n Test c_bound value : " + str( + self.majority_vote.cbound_value(self.x_test, y_rework)) return interpret_string def get_name_for_fusion(self): @@ -571,8 +616,8 @@ class MinCQ(MinCqLearner, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"mu":args.MCQ_mu, - "n_stumps_per_attribute":args.MCQ_stumps} + kwargsDict = {"mu": args.MCQ_mu, + "n_stumps_per_attribute": args.MCQ_stumps} return kwargsDict @@ -581,4 +626,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py index f7f7fbfb601662a9cb64f30815c6e56805b9e059..7923daec1798bd4cb1dbd21f92b13e6f6bd4e3c1 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpy.py @@ -1,17 +1,19 @@ import numpy as np -from ..Monoview.Additions.MinCQUtils import RegularizedBinaryMinCqClassifier from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator +from ..Monoview.Additions.MinCQUtils import RegularizedBinaryMinCqClassifier from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomUniform - class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): - def __init__(self, random_state=None, mu=0.01, self_complemented=True, n_stumps_per_attribute=1, **kwargs): + def __init__(self, random_state=None, mu=0.01, self_complemented=True, + n_stumps_per_attribute=1, **kwargs): super(MinCQGraalpy, self).__init__(mu=mu, - estimators_generator=StumpsClassifiersGenerator(n_stumps_per_attribute=n_stumps_per_attribute, self_complemented=self_complemented), - ) + estimators_generator=StumpsClassifiersGenerator( + n_stumps_per_attribute=n_stumps_per_attribute, + self_complemented=self_complemented), + ) self.param_names = ["mu", "n_stumps_per_attribute", "random_state"] self.distribs = [CustomUniform(loc=0.05, state=2.0, multiplier="e-"), [n_stumps_per_attribute], [random_state]] @@ -35,11 +37,12 @@ class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): return self def get_params(self, deep=True): - return {"random_state":self.random_state, "mu":self.mu, "n_stumps_per_attribute":self.n_stumps_per_attribute} + return {"random_state": self.random_state, "mu": self.mu, + "n_stumps_per_attribute": self.n_stumps_per_attribute} def getInterpret(self, directory, y_test): - interpret_string = "Cbound on train :"+str(self.train_cbound) - np.savetxt(directory+"times.csv", np.array([self.train_time, 0])) + interpret_string = "Cbound on train :" + str(self.train_cbound) + np.savetxt(directory + "times.csv", np.array([self.train_time, 0])) # interpret_string += "Train C_bound value : "+str(self.cbound_train) # y_rework = np.copy(y_test) # y_rework[np.where(y_rework==0)] = -1 @@ -52,8 +55,8 @@ class MinCQGraalpy(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"mu":args.MCG_mu, - "n_stumps_per_attribute":args.MCG_stumps} + kwargsDict = {"mu": args.MCG_mu, + "n_stumps_per_attribute": args.MCG_stumps} return kwargsDict @@ -62,4 +65,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpyTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpyTree.py index d5907d69ec3e6ecb9ec2b2be5dbf7d938d8fb887..b174cbfd87480e80d1f6987277064b1942c58979 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpyTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/MinCQGraalpyTree.py @@ -1,20 +1,23 @@ import numpy as np -from ..Monoview.Additions.MinCQUtils import RegularizedBinaryMinCqClassifier from ..Monoview.Additions.BoostUtils import TreeClassifiersGenerator +from ..Monoview.Additions.MinCQUtils import RegularizedBinaryMinCqClassifier from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomUniform +class MinCQGraalpyTree(RegularizedBinaryMinCqClassifier, + BaseMonoviewClassifier): -class MinCQGraalpyTree(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier): - - def __init__(self, random_state=None, mu=0.01, self_complemented=True, n_stumps_per_attribute=1, max_depth=2, **kwargs): + def __init__(self, random_state=None, mu=0.01, self_complemented=True, + n_stumps_per_attribute=1, max_depth=2, **kwargs): super(MinCQGraalpyTree, self).__init__(mu=mu, - estimators_generator=TreeClassifiersGenerator(n_trees=n_stumps_per_attribute, - max_depth=max_depth, - self_complemented=self_complemented), - ) - self.param_names = ["mu", "n_stumps_per_attribute", "random_state", "max_depth"] + estimators_generator=TreeClassifiersGenerator( + n_trees=n_stumps_per_attribute, + max_depth=max_depth, + self_complemented=self_complemented), + ) + self.param_names = ["mu", "n_stumps_per_attribute", "random_state", + "max_depth"] self.distribs = [CustomUniform(loc=0.05, state=2.0, multiplier="e-"), [n_stumps_per_attribute], [random_state], [max_depth]] self.n_stumps_per_attribute = n_stumps_per_attribute @@ -39,11 +42,13 @@ class MinCQGraalpyTree(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier) return self def get_params(self, deep=True): - return {"random_state":self.random_state, "mu":self.mu, "n_stumps_per_attribute":self.n_stumps_per_attribute, "max_depth":self.max_depth} + return {"random_state": self.random_state, "mu": self.mu, + "n_stumps_per_attribute": self.n_stumps_per_attribute, + "max_depth": self.max_depth} def getInterpret(self, directory, y_test): - interpret_string = "Cbound on train :"+str(self.train_cbound) - np.savetxt(directory+"times.csv", np.array([self.train_time, 0])) + interpret_string = "Cbound on train :" + str(self.train_cbound) + np.savetxt(directory + "times.csv", np.array([self.train_time, 0])) # interpret_string += "Train C_bound value : "+str(self.cbound_train) # y_rework = np.copy(y_test) # y_rework[np.where(y_rework==0)] = -1 @@ -56,9 +61,9 @@ class MinCQGraalpyTree(RegularizedBinaryMinCqClassifier, BaseMonoviewClassifier) def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {"mu":args.MCGT_mu, - "n_stumps_per_attribute":args.MCGT_trees, - "max_depth":args.MCGT_max_depth} + kwargsDict = {"mu": args.MCGT_mu, + "n_stumps_per_attribute": args.MCGT_trees, + "max_depth": args.MCGT_max_depth} return kwargsDict @@ -67,4 +72,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py index 039d237fbbb0eb64f48abe3026d1d7019a031f23..7e2ca120918af6689b010c67ced8c2a044f9ca69 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoost.py @@ -1,21 +1,20 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier class QarBoost(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): super(QarBoost, self).__init__(n_max_iterations=500, - random_state=random_state, - self_complemented=True, - twice_the_same=True, - c_bound_choice=True, - random_start=False, - n_stumps_per_attribute=10, - use_r=True, - c_bound_sol=False - ) + random_state=random_state, + self_complemented=True, + twice_the_same=True, + c_bound_choice=True, + random_start=False, + n_stumps_per_attribute=10, + use_r=True, + c_bound_sol=False + ) self.param_names = [] self.distribs = [] self.classed_params = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py index 63de1129d1e77ff84cfb1f26e62b31737bfcf03b..8e48e1e0791b8486e768b528d3e4019ff6951aa3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostNC3.py @@ -1,21 +1,20 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier class QarBoostNC3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): super(QarBoostNC3, self).__init__(n_max_iterations=300, - random_state=random_state, - self_complemented=True, - twice_the_same=False, - c_bound_choice=True, - random_start=False, - n_stumps_per_attribute=1, - use_r=True, - c_bound_sol=True - ) + random_state=random_state, + self_complemented=True, + twice_the_same=False, + c_bound_choice=True, + random_start=False, + n_stumps_per_attribute=1, + use_r=True, + c_bound_sol=True + ) self.param_names = [] self.distribs = [] self.classed_params = [] @@ -43,4 +42,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py index 01cd59109cbfbf92ce1b9a0819e4a26d42959d46..dfa611767f01bab810fa51510392d2ef14cd3974 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv2.py @@ -1,21 +1,20 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier class QarBoostv2(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def __init__(self, random_state=None, **kwargs): super(QarBoostv2, self).__init__(n_max_iterations=300, - random_state=random_state, - self_complemented=True, - twice_the_same=False, - c_bound_choice=True, - random_start=False, - n_stumps_per_attribute=1, - use_r=True, - c_bound_sol=False - ) + random_state=random_state, + self_complemented=True, + twice_the_same=False, + c_bound_choice=True, + random_start=False, + n_stumps_per_attribute=1, + use_r=True, + c_bound_sol=False + ) self.param_names = [] self.distribs = [] self.classed_params = [] @@ -43,4 +42,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py index 0b5b418238a3d4a2da5065b302492b26707f609a..28b064ca2c16f200dea2794198c811eaef7395b0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/QarBoostv3.py @@ -1,6 +1,5 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier -from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.CGDescUtils import ColumnGenerationClassifierQar +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier class QarBoostv3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): @@ -34,11 +33,13 @@ class QarBoostv3(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def get_name_for_fusion(self): return "QBv3" + def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {} return kwargsDict + def paramsToSet(nIter, randomState): """Used for weighted linear early fusion to generate random search sets""" paramsSet = [] diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py index 307136f912a2a223a048f5678375bb920a4df87d..2bef1b5f41ae0274238c0ed120c50ddafafbe793 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -16,12 +16,13 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier): max_depth=max_depth, criterion=criterion, random_state=random_state - ) - self.param_names = ["n_estimators", "max_depth", "criterion", "random_state"] + ) + self.param_names = ["n_estimators", "max_depth", "criterion", + "random_state"] self.classed_params = [] self.distribs = [CustomRandint(low=1, high=300), CustomRandint(low=1, high=300), - ["gini", "entropy"], [random_state] ] + ["gini", "entropy"], [random_state]] self.weird_strings = {} def canProbas(self): diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py index adda1111dbcbfdf1649262dc65c803045774cd7e..dc789830cc859d5a6dc2329290a6eeb2b6bb75b0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py @@ -1,13 +1,13 @@ -from sklearn.externals.six import iteritems from pyscm.scm import SetCoveringMachineClassifier as scm -from sklearn.base import BaseEstimator, ClassifierMixin -from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, \ + BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + # class DecisionStumpSCMNew(scm, BaseEstimator, ClassifierMixin): # """docstring for SCM # A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like @@ -50,7 +50,7 @@ class SCM(scm, BaseMonoviewClassifier): model_type=model_type, max_rules=max_rules, p=p - ) + ) self.param_names = ["model_type", "max_rules", "p", "random_state"] self.distribs = [["conjunction", "disjunction"], CustomRandint(low=1, high=15), @@ -78,7 +78,8 @@ def formatCmdArgs(args): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append({"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) + paramsSet.append( + {"model_type": randomState.choice(["conjunction", "disjunction"]), + "max_rules": randomState.randint(1, 15), + "p": randomState.random_sample()}) return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py index f4b6df25724fabb6686f7f04fa5b3f94b6d160cf..8f566ba8bd9a4d40c73ee81509af35c70b45b5dc 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py @@ -1,30 +1,34 @@ -from sklearn.externals.six import iteritems -from pyscm.scm import SetCoveringMachineClassifier as scm -from sklearn.base import BaseEstimator, ClassifierMixin -import numpy as np import os -from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero -from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator, BaseBoost +import numpy as np +from pyscm.scm import SetCoveringMachineClassifier as scm + from ..Monoview.Additions.PregenUtils import PregenClassifier +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, \ + BaseMonoviewClassifier + # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): def __init__(self, random_state=None, model_type="conjunction", - max_rules=10, p=0.1, n_stumps=10,self_complemented=True, **kwargs): + max_rules=10, p=0.1, n_stumps=10, self_complemented=True, + **kwargs): super(SCMPregen, self).__init__( random_state=random_state, model_type=model_type, max_rules=max_rules, p=p - ) - self.param_names = ["model_type", "max_rules", "p", "n_stumps", "random_state"] + ) + self.param_names = ["model_type", "max_rules", "p", "n_stumps", + "random_state"] self.distribs = [["conjunction", "disjunction"], CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [n_stumps], [random_state]] + CustomUniform(loc=0, state=1), [n_stumps], + [random_state]] self.classed_params = [] self.weird_strings = {} self.self_complemented = self_complemented @@ -35,32 +39,34 @@ class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): pregen_X, _ = self.pregen_voters(X, y) list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) - super(SCMPregen, self).fit(place_holder, y, tiebreaker=tiebreaker, iteration_callback=iteration_callback, **fit_params) + super(SCMPregen, self).fit(place_holder, y, tiebreaker=tiebreaker, + iteration_callback=iteration_callback, + **fit_params) return self def predict(self, X): pregen_X, _ = self.pregen_voters(X) list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) @@ -68,8 +74,8 @@ class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): def get_params(self, deep=True): return {"p": self.p, "model_type": self.model_type, - "max_rules": self.max_rules, - "random_state": self.random_state, "n_stumps":self.n_stumps} + "max_rules": self.max_rules, + "random_state": self.random_state, "n_stumps": self.n_stumps} def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -92,9 +98,8 @@ def formatCmdArgs(args): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append({"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) + paramsSet.append( + {"model_type": randomState.choice(["conjunction", "disjunction"]), + "max_rules": randomState.randint(1, 15), + "p": randomState.random_sample()}) return paramsSet - - diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py index 5ead0cf96d21560ae3eb177f59db4d54574bc0e0..efb418665735c1a7d1db2a2253959c0666de4f18 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py @@ -1,33 +1,37 @@ -from sklearn.externals.six import iteritems -from pyscm.scm import SetCoveringMachineClassifier as scm -from sklearn.base import BaseEstimator, ClassifierMixin -import numpy as np import os -from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier, change_label_to_minus, change_label_to_zero -from ..Monoview.Additions.BoostUtils import StumpsClassifiersGenerator, BaseBoost +import numpy as np +from pyscm.scm import SetCoveringMachineClassifier as scm + from ..Monoview.Additions.PregenUtils import PregenClassifier +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, \ + BaseMonoviewClassifier + # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + class SCMPregenTree(scm, BaseMonoviewClassifier, PregenClassifier): def __init__(self, random_state=None, model_type="conjunction", - max_rules=10, p=0.1, n_stumps=10,self_complemented=True, max_depth=2, **kwargs): + max_rules=10, p=0.1, n_stumps=10, self_complemented=True, + max_depth=2, **kwargs): super(SCMPregenTree, self).__init__( random_state=random_state, model_type=model_type, max_rules=max_rules, p=p - ) - self.param_names = ["model_type", "max_rules", "p", "n_stumps", "random_state", "max_depth"] + ) + self.param_names = ["model_type", "max_rules", "p", "n_stumps", + "random_state", "max_depth"] self.distribs = [["conjunction", "disjunction"], CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [n_stumps], [random_state], [max_depth]] + CustomUniform(loc=0, state=1), [n_stumps], + [random_state], [max_depth]] self.classed_params = [] self.weird_strings = {} - self.max_depth=max_depth + self.max_depth = max_depth self.self_complemented = self_complemented self.random_state = random_state self.n_stumps = n_stumps @@ -37,32 +41,34 @@ class SCMPregenTree(scm, BaseMonoviewClassifier, PregenClassifier): pregen_X, _ = self.pregen_voters(X, y, generator="Trees") list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) - super(SCMPregenTree, self).fit(place_holder, y, tiebreaker=tiebreaker, iteration_callback=iteration_callback, **fit_params) + super(SCMPregenTree, self).fit(place_holder, y, tiebreaker=tiebreaker, + iteration_callback=iteration_callback, + **fit_params) return self def predict(self, X): - pregen_X, _ = self.pregen_voters(X,) + pregen_X, _ = self.pregen_voters(X, ) list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) @@ -70,8 +76,9 @@ class SCMPregenTree(scm, BaseMonoviewClassifier, PregenClassifier): def get_params(self, deep=True): return {"p": self.p, "model_type": self.model_type, - "max_rules": self.max_rules, - "random_state": self.random_state, "n_stumps":self.n_stumps, "max_depth":self.max_depth} + "max_rules": self.max_rules, + "random_state": self.random_state, "n_stumps": self.n_stumps, + "max_depth": self.max_depth} def canProbas(self): """Used to know if the classifier can return label probabilities""" @@ -88,16 +95,15 @@ def formatCmdArgs(args): "p": args.SCPT_p, "max_rules": args.SCPT_max_rules, "n_stumps": args.SCPT_trees, - "max_depth":args.SCPT_max_depth} + "max_depth": args.SCPT_max_depth} return kwargsDict def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append({"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) + paramsSet.append( + {"model_type": randomState.choice(["conjunction", "disjunction"]), + "max_rules": randomState.randint(1, 15), + "p": randomState.random_sample()}) return paramsSet - - diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py index 83c757f7ec83317605a1caaf0a13f438913cb583..c2bdb32b31bdcd41fd69dd75da93900a122b763a 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py @@ -1,13 +1,13 @@ -from sklearn.externals.six import iteritems -from pyscm.scm import SetCoveringMachineClassifier as scm -from sklearn.base import BaseEstimator, ClassifierMixin -import numpy as np -import time import os +import time + +import numpy as np +from pyscm.scm import SetCoveringMachineClassifier as scm -from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier -from ..Monoview.Additions.PregenUtils import PregenClassifier from ..Metrics import zero_one_loss +from ..Monoview.Additions.PregenUtils import PregenClassifier +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, \ + BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" @@ -17,68 +17,77 @@ __status__ = "Prototype" # Production, Development, Prototype class SCMSparsity(BaseMonoviewClassifier, PregenClassifier): def __init__(self, random_state=None, model_type="disjunction", - max_rules=10, p=0.1, n_stumps=1, self_complemented=True, **kwargs): + max_rules=10, p=0.1, n_stumps=1, self_complemented=True, + **kwargs): self.scm_estimators = [scm( random_state=random_state, model_type=model_type, - max_rules=max_rule+1, + max_rules=max_rule + 1, p=p - ) for max_rule in range(max_rules)] + ) for max_rule in range(max_rules)] self.model_type = model_type self.self_complemented = self_complemented self.n_stumps = n_stumps self.p = p self.random_state = random_state self.max_rules = max_rules - self.param_names = ["model_type", "max_rules", "p", "random_state", "n_stumps"] + self.param_names = ["model_type", "max_rules", "p", "random_state", + "n_stumps"] self.distribs = [["conjunction", "disjunction"], CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [random_state], [n_stumps]] + CustomUniform(loc=0, state=1), [random_state], + [n_stumps]] self.classed_params = [] self.weird_strings = {} def get_params(self): - return {"model_type":self.model_type, "p":self.p, "max_rules":self.max_rules, "random_state":self.random_state, "n_stumps":self.n_stumps} + return {"model_type": self.model_type, "p": self.p, + "max_rules": self.max_rules, "random_state": self.random_state, + "n_stumps": self.n_stumps} def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): pregen_X, _ = self.pregen_voters(X, y) list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) for scm_estimator in self.scm_estimators: beg = time.time() - scm_estimator.fit(place_holder, y, tiebreaker=None, iteration_callback=None, **fit_params) + scm_estimator.fit(place_holder, y, tiebreaker=None, + iteration_callback=None, **fit_params) end = time.time() - self.times = np.array([end-beg, 0]) - self.train_metrics = [zero_one_loss.score(y, scm_estimator.predict(place_holder)) for scm_estimator in self.scm_estimators] + self.times = np.array([end - beg, 0]) + self.train_metrics = [ + zero_one_loss.score(y, scm_estimator.predict(place_holder)) for + scm_estimator in self.scm_estimators] return self.scm_estimators[-1] def predict(self, X): - pregen_X, _ = self.pregen_voters(X,) + pregen_X, _ = self.pregen_voters(X, ) list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) - self.preds = [scm_estimator.predict(place_holder) for scm_estimator in self.scm_estimators] + self.preds = [scm_estimator.predict(place_holder) for scm_estimator in + self.scm_estimators] return self.preds[-1] def canProbas(self): @@ -87,7 +96,8 @@ class SCMSparsity(BaseMonoviewClassifier, PregenClassifier): def getInterpret(self, directory, y_test): interpretString = "" - np.savetxt(directory+"test_metrics.csv", np.array([zero_one_loss.score(y_test, pred) for pred in self.preds])) + np.savetxt(directory + "test_metrics.csv", np.array( + [zero_one_loss.score(y_test, pred) for pred in self.preds])) np.savetxt(directory + "times.csv", self.times) np.savetxt(directory + "train_metrics.csv", self.train_metrics) return interpretString @@ -105,7 +115,8 @@ def formatCmdArgs(args): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append({"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) + paramsSet.append( + {"model_type": randomState.choice(["conjunction", "disjunction"]), + "max_rules": randomState.randint(1, 15), + "p": randomState.random_sample()}) return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py index bb2717336c04d9822cb883b1a185bd29b93c4a3e..3071c5ca55f8f3b090120cc44041e1c3fc28d89c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py @@ -1,13 +1,13 @@ -from sklearn.externals.six import iteritems -from pyscm.scm import SetCoveringMachineClassifier as scm -from sklearn.base import BaseEstimator, ClassifierMixin -import numpy as np -import time import os +import time + +import numpy as np +from pyscm.scm import SetCoveringMachineClassifier as scm -from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, BaseMonoviewClassifier -from ..Monoview.Additions.PregenUtils import PregenClassifier from ..Metrics import zero_one_loss +from ..Monoview.Additions.PregenUtils import PregenClassifier +from ..Monoview.MonoviewUtils import CustomRandint, CustomUniform, \ + BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" @@ -21,64 +21,72 @@ class SCMSparsityTree(BaseMonoviewClassifier, PregenClassifier): self.scm_estimators = [scm( random_state=random_state, model_type=model_type, - max_rules=max_rule+1, + max_rules=max_rule + 1, p=p - ) for max_rule in range(max_rules)] + ) for max_rule in range(max_rules)] self.model_type = model_type - self.max_depth=max_depth + self.max_depth = max_depth self.p = p self.n_stumps = n_stumps self.random_state = random_state self.max_rules = max_rules - self.param_names = ["model_type", "max_rules", "p", "random_state", "max_depth"] + self.param_names = ["model_type", "max_rules", "p", "random_state", + "max_depth"] self.distribs = [["conjunction", "disjunction"], CustomRandint(low=1, high=15), - CustomUniform(loc=0, state=1), [random_state], [max_depth]] + CustomUniform(loc=0, state=1), [random_state], + [max_depth]] self.classed_params = [] self.weird_strings = {} def get_params(self): - return {"model_type":self.model_type, "p":self.p, "max_rules":self.max_rules, "random_state":self.random_state, "max_depth":self.max_depth, "n_stumps":self.n_stumps} + return {"model_type": self.model_type, "p": self.p, + "max_rules": self.max_rules, "random_state": self.random_state, + "max_depth": self.max_depth, "n_stumps": self.n_stumps} def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): pregen_X, _ = self.pregen_voters(X, y, generator="Trees") list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) for scm_estimator in self.scm_estimators: beg = time.time() - scm_estimator.fit(place_holder, y, tiebreaker=None, iteration_callback=None, **fit_params) + scm_estimator.fit(place_holder, y, tiebreaker=None, + iteration_callback=None, **fit_params) end = time.time() - self.times = np.array([end-beg, 0]) - self.train_metrics = [zero_one_loss.score(y, scm_estimator.predict(place_holder)) for scm_estimator in self.scm_estimators] + self.times = np.array([end - beg, 0]) + self.train_metrics = [ + zero_one_loss.score(y, scm_estimator.predict(place_holder)) for + scm_estimator in self.scm_estimators] return self.scm_estimators[-1] def predict(self, X): pregen_X, _ = self.pregen_voters(X, generator="Trees") list_files = os.listdir(".") a = int(self.random_state.randint(0, 10000)) - if "pregen_x"+str(a)+".csv" in list_files: + if "pregen_x" + str(a) + ".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" else: - file_name = "pregen_x"+str(a)+".csv" + file_name = "pregen_x" + str(a) + ".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) - self.preds = [scm_estimator.predict(place_holder) for scm_estimator in self.scm_estimators] + self.preds = [scm_estimator.predict(place_holder) for scm_estimator in + self.scm_estimators] return self.preds[-1] def canProbas(self): @@ -87,7 +95,8 @@ class SCMSparsityTree(BaseMonoviewClassifier, PregenClassifier): def getInterpret(self, directory, y_test): interpretString = "" - np.savetxt(directory+"test_metrics.csv", np.array([zero_one_loss.score(y_test, pred) for pred in self.preds])) + np.savetxt(directory + "test_metrics.csv", np.array( + [zero_one_loss.score(y_test, pred) for pred in self.preds])) np.savetxt(directory + "times.csv", self.times) np.savetxt(directory + "train_metrics.csv", self.train_metrics) return interpretString @@ -106,7 +115,8 @@ def formatCmdArgs(args): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append({"model_type": randomState.choice(["conjunction", "disjunction"]), - "max_rules": randomState.randint(1, 15), - "p": randomState.random_sample()}) + paramsSet.append( + {"model_type": randomState.choice(["conjunction", "disjunction"]), + "max_rules": randomState.randint(1, 15), + "p": randomState.random_sample()}) return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py index 6ce00d837dc08c240927f6eda88f25cc1a13fe4d..a1d3d50266a43797ddc324b6fa6a3674c5976057 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py @@ -16,7 +16,7 @@ class SGD(SGDClassifier, BaseMonoviewClassifier): penalty=penalty, alpha=alpha, random_state=random_state - ) + ) self.param_names = ["loss", "penalty", "alpha", "random_state"] self.classed_params = [] self.distribs = [['log', 'modified_huber'], @@ -45,6 +45,7 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({"loss": randomState.choice(['log', 'modified_huber']), - "penalty": randomState.choice(["l1", "l2", "elasticnet"]), + "penalty": randomState.choice( + ["l1", "l2", "elasticnet"]), "alpha": randomState.random_sample()}) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py index e545fce58c181ce78f6cce99619f71972a717c01..4eb427b682e62a9dcc7498cadcc8aceac18af2b0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -1,4 +1,3 @@ - from ..Monoview.Additions.SVCClassifier import SVCClassifier from ..Monoview.MonoviewUtils import CustomUniform, BaseMonoviewClassifier @@ -14,12 +13,11 @@ class SVMLinear(SVCClassifier, BaseMonoviewClassifier): C=C, kernel='linear', random_state=random_state - ) + ) self.param_names = ["C", "random_state"] self.distribs = [CustomUniform(loc=0, state=1), [random_state]] - def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {"C": args.SVML_C, } diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMPoly.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMPoly.py index 454c1847d714dd3178c11f3ca65e45fa58791340..a5154599990dd1f9230144f793a1ad88cc453bca 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -1,5 +1,6 @@ from ..Monoview.Additions.SVCClassifier import SVCClassifier -from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier +from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, \ + BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" @@ -16,7 +17,8 @@ class SVMPoly(SVCClassifier, BaseMonoviewClassifier): random_state=random_state ) self.param_names = ["C", "degree", "random_state"] - self.distribs = [CustomUniform(loc=0, state=1), CustomRandint(low=2, high=30), [random_state]] + self.distribs = [CustomUniform(loc=0, state=1), + CustomRandint(low=2, high=30), [random_state]] def formatCmdArgs(args): @@ -28,6 +30,6 @@ def formatCmdArgs(args): def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): - paramsSet.append({"C": randomState.randint(1, 10000), "degree": randomState.randint(1, 30)}) + paramsSet.append({"C": randomState.randint(1, 10000), + "degree": randomState.randint(1, 30)}) return paramsSet - diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMRBF.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMRBF.py index 63e7dea97c7b7fc5a19551ba594fe33762f810f5..8e8e8d03b6e42dc19b5939b097caa0d75b146675 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -1,5 +1,5 @@ from ..Monoview.Additions.SVCClassifier import SVCClassifier -from ..Monoview.MonoviewUtils import CustomUniform, CustomRandint, BaseMonoviewClassifier +from ..Monoview.MonoviewUtils import CustomUniform, BaseMonoviewClassifier # Author-Info __author__ = "Baptiste Bauvin" @@ -28,4 +28,4 @@ def paramsToSet(nIter, randomState): paramsSet = [] for _ in range(nIter): paramsSet.append({"C": randomState.randint(1, 10000), }) - return paramsSet \ No newline at end of file + return paramsSet diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/__init__.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/__init__.py index 03d92693e7f06109d9830c1855ee2b66ae807d9a..be5528d2219a00afdbe16ac9ba088a91306f75be 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/__init__.py @@ -1,8 +1,9 @@ import os + for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): if module == '__init__.py' or module[-3:] != '.py': continue - __import__(module[:-3], locals(), globals(), [],1) + __import__(module[:-3], locals(), globals(), [], 1) del module del os @@ -27,4 +28,4 @@ Define a getKWARGS function KWARGSDict : a dictionnary of arguments matching the kwargs needed in train Define a getConfig function that returns a string explaining the algorithm's config using a config dict or list Add the arguments to configure the classifier in the parser in ExecClassif.py -""" \ No newline at end of file +""" diff --git a/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py b/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py index 6faeb7e91dba45d3ff7f7c7fac4bb1b2d0de1a00..ded01232c360476be91c1eeba56bcb76af045be6 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py @@ -1 +1 @@ -from . import diversity_utils \ No newline at end of file +from . import diversity_utils diff --git a/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/diversity_utils.py b/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/diversity_utils.py index 59a35690cc108c1ff39ece36232247c1683e7857..fce99c308b91c42fffc622b636a953dd447f7071 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/diversity_utils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/diversity_utils.py @@ -1,8 +1,9 @@ -import numpy as np -import math import itertools +import math import os +import numpy as np + from ...utils.Multiclass import isBiclass, genMulticlassMonoviewDecision @@ -29,12 +30,18 @@ def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview): classifiersDecisions = np.zeros((nbViews, nbClassifiers, nbFolds, foldsLen)) for resultMonoview in resultsMonoview: - if resultMonoview.classifier_name in classifiersNames[viewsIndices.index(resultMonoview.view_index)]: + if resultMonoview.classifier_name in classifiersNames[ + viewsIndices.index(resultMonoview.view_index)]: pass else: - classifiersNames[viewsIndices.index(resultMonoview.view_index)].append(resultMonoview.classifier_name) - classifierIndex = classifiersNames[viewsIndices.index(resultMonoview.view_index)].index(resultMonoview.classifier_name) - classifiersDecisions[viewsIndices.index(resultMonoview.view_index), classifierIndex] = resultMonoview.test_folds_preds + classifiersNames[ + viewsIndices.index(resultMonoview.view_index)].append( + resultMonoview.classifier_name) + classifierIndex = classifiersNames[ + viewsIndices.index(resultMonoview.view_index)].index( + resultMonoview.classifier_name) + classifiersDecisions[viewsIndices.index( + resultMonoview.view_index), classifierIndex] = resultMonoview.test_folds_preds # else: # train_len = resultsMonoview[0].test_folds_preds.shape[0] # classifiersDecisions = np.zeros((nbViews, nbClassifiers, 1, train_len)) @@ -48,7 +55,8 @@ def getClassifiersDecisions(allClassifersNames, viewsIndices, resultsMonoview): return classifiersDecisions, classifiersNames -def couple_div_measure(classifiersNames, classifiersDecisions, measurement, foldsGroudTruth): +def couple_div_measure(classifiersNames, classifiersDecisions, measurement, + foldsGroudTruth): """ This function is used to get the max of a couple diversity measurement,passed as an argument It generates all possible combinations and all the couples to estimate the diversity on a combination @@ -56,85 +64,112 @@ def couple_div_measure(classifiersNames, classifiersDecisions, measurement, fold """ nbViews, nbClassifiers, nbFolds, foldsLen = classifiersDecisions.shape - combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews) - nbCombinations = int(math.factorial(nbClassifiers+nbViews-1) / math.factorial(nbViews) / math.factorial(nbClassifiers-1)) + combinations = itertools.combinations_with_replacement(range(nbClassifiers), + nbViews) + nbCombinations = int( + math.factorial(nbClassifiers + nbViews - 1) / math.factorial( + nbViews) / math.factorial(nbClassifiers - 1)) div_measure = np.zeros(nbCombinations) combis = np.zeros((nbCombinations, nbViews), dtype=int) for combinationsIndex, combination in enumerate(combinations): combis[combinationsIndex] = combination - combiWithView = [(viewIndex,combiIndex) for viewIndex, combiIndex in enumerate(combination)] + combiWithView = [(viewIndex, combiIndex) for viewIndex, combiIndex in + enumerate(combination)] binomes = itertools.combinations(combiWithView, 2) - nbBinomes = int(math.factorial(nbViews) / 2 / math.factorial(nbViews-2)) + nbBinomes = int( + math.factorial(nbViews) / 2 / math.factorial(nbViews - 2)) couple_diversities = np.zeros(nbBinomes) for binomeIndex, binome in enumerate(binomes): - (viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome - folds_couple_diversity = np.mean(measurement(classifiersDecisions[viewIndex1, classifierIndex1], - classifiersDecisions[viewIndex2, classifierIndex2], foldsGroudTruth) - , axis=1) + (viewIndex1, classifierIndex1), ( + viewIndex2, classifierIndex2) = binome + folds_couple_diversity = np.mean( + measurement(classifiersDecisions[viewIndex1, classifierIndex1], + classifiersDecisions[viewIndex2, classifierIndex2], + foldsGroudTruth) + , axis=1) couple_diversities[binomeIndex] = np.mean(folds_couple_diversity) div_measure[combinationsIndex] = np.mean(couple_diversities) bestCombiIndex = np.argmax(div_measure) bestCombination = combis[bestCombiIndex] - return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)], div_measure[bestCombiIndex] + return [classifiersNames[viewIndex][index] for viewIndex, index in + enumerate(bestCombination)], div_measure[bestCombiIndex] -def global_div_measure(classifiersNames, classifiersDecisions, measurement, foldsGroudTruth): +def global_div_measure(classifiersNames, classifiersDecisions, measurement, + foldsGroudTruth): """ This function is used to get the max of a diversity measurement,passed as an argument It generates all possible combinations to estimate the diversity on a combination The best combination is the one that maximize the measurement. """ - nbViews, nbClassifiers, nbFolds, foldsLen = classifiersDecisions.shape - combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews) - nbCombinations = int(math.factorial(nbClassifiers + nbViews - 1) / math.factorial(nbViews) / math.factorial( - nbClassifiers - 1)) + combinations = itertools.combinations_with_replacement(range(nbClassifiers), + nbViews) + nbCombinations = int( + math.factorial(nbClassifiers + nbViews - 1) / math.factorial( + nbViews) / math.factorial( + nbClassifiers - 1)) div_measure = np.zeros(nbCombinations) combis = np.zeros((nbCombinations, nbViews), dtype=int) for combinationsIndex, combination in enumerate(combinations): combis[combinationsIndex] = combination - div_measure[combinationsIndex] = measurement(classifiersDecisions, combination, foldsGroudTruth, foldsLen) + div_measure[combinationsIndex] = measurement(classifiersDecisions, + combination, + foldsGroudTruth, foldsLen) bestCombiIndex = np.argmax(div_measure) bestCombination = combis[bestCombiIndex] - return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)], div_measure[ - bestCombiIndex] + return [classifiersNames[viewIndex][index] for viewIndex, index in + enumerate(bestCombination)], div_measure[ + bestCombiIndex] -def CQ_div_measure(classifiersNames, classifiersDecisions, measurement, foldsGroudTruth): +def CQ_div_measure(classifiersNames, classifiersDecisions, measurement, + foldsGroudTruth): """ This function is used to measure a pseudo-CQ measurement based on the minCq algorithm. It's a mix between couple_div_measure and global_div_measure that uses multiple measurements. """ nbViews, nbClassifiers, nbFolds, foldsLen = classifiersDecisions.shape - combinations = itertools.combinations_with_replacement(range(nbClassifiers), nbViews) + combinations = itertools.combinations_with_replacement(range(nbClassifiers), + nbViews) nbCombinations = int( - math.factorial(nbClassifiers + nbViews - 1) / math.factorial(nbViews) / math.factorial(nbClassifiers - 1)) + math.factorial(nbClassifiers + nbViews - 1) / math.factorial( + nbViews) / math.factorial(nbClassifiers - 1)) div_measure = np.zeros(nbCombinations) combis = np.zeros((nbCombinations, nbViews), dtype=int) for combinationsIndex, combination in enumerate(combinations): combis[combinationsIndex] = combination - combiWithView = [(viewIndex, combiIndex) for viewIndex, combiIndex in enumerate(combination)] + combiWithView = [(viewIndex, combiIndex) for viewIndex, combiIndex in + enumerate(combination)] binomes = itertools.combinations(combiWithView, 2) - nbBinomes = int(math.factorial(nbViews) / 2 / math.factorial(nbViews - 2)) + nbBinomes = int( + math.factorial(nbViews) / 2 / math.factorial(nbViews - 2)) disagreement = np.zeros(nbBinomes) - div_measure[combinationsIndex] = measurement[1](classifiersDecisions, combination, foldsGroudTruth, foldsLen) + div_measure[combinationsIndex] = measurement[1](classifiersDecisions, + combination, + foldsGroudTruth, + foldsLen) for binomeIndex, binome in enumerate(binomes): - (viewIndex1, classifierIndex1), (viewIndex2, classifierIndex2) = binome - nbDisagree = np.sum(measurement[0](classifiersDecisions[viewIndex1, classifierIndex1], - classifiersDecisions[viewIndex2, classifierIndex2], foldsGroudTruth) + (viewIndex1, classifierIndex1), ( + viewIndex2, classifierIndex2) = binome + nbDisagree = np.sum(measurement[0]( + classifiersDecisions[viewIndex1, classifierIndex1], + classifiersDecisions[viewIndex2, classifierIndex2], + foldsGroudTruth) , axis=1) / float(foldsLen) disagreement[binomeIndex] = np.mean(nbDisagree) div_measure[combinationsIndex] /= float(np.mean(disagreement)) bestCombiIndex = np.argmin(div_measure) bestCombination = combis[bestCombiIndex] - return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)], div_measure[ - bestCombiIndex] + return [classifiersNames[viewIndex][index] for viewIndex, index in + enumerate(bestCombination)], div_measure[ + bestCombiIndex] def getFoldsGroundTruth(directory, folds=True): @@ -142,50 +177,67 @@ def getFoldsGroundTruth(directory, folds=True): foldsGroundTruth is formatted as foldsGroundTruth[foldIndex, exampleIndex]""" if folds: - foldsFilesNames = os.listdir(directory+"folds/") - foldLen = len(np.genfromtxt(directory+"folds/"+foldsFilesNames[0], delimiter=',')) + foldsFilesNames = os.listdir(directory + "folds/") + foldLen = len(np.genfromtxt(directory + "folds/" + foldsFilesNames[0], + delimiter=',')) foldsGroudTruth = np.zeros((len(foldsFilesNames), foldLen), dtype=int) for fileName in foldsFilesNames: foldIndex = int(fileName[-5]) - foldsGroudTruth[foldIndex] = np.genfromtxt(directory+"folds/"+fileName, delimiter=',')[:foldLen] + foldsGroudTruth[foldIndex] = np.genfromtxt( + directory + "folds/" + fileName, delimiter=',')[:foldLen] return foldsGroudTruth else: - train_labels = np.genfromtxt(directory+"train_labels.csv", delimiter=',') + train_labels = np.genfromtxt(directory + "train_labels.csv", + delimiter=',') foldsGroudTruth = np.zeros((1, train_labels.shape[0])) foldsGroudTruth[0] = train_labels return foldsGroudTruth - def getArgs(args, benchmark, views, viewsIndices, randomState, - directory, resultsMonoview, classificationIndices, measurement, name): + directory, resultsMonoview, classificationIndices, measurement, + name): """This function is a general function to get the args for all the measurements used""" if len(resultsMonoview[0].test_folds_preds.shape) is not 1: foldsGroundTruth = getFoldsGroundTruth(directory, folds=True) else: foldsGroundTruth = getFoldsGroundTruth(directory, folds=False) monoviewClassifierModulesNames = benchmark["Monoview"] - classifiersDecisions, classifiersNames = getClassifiersDecisions(monoviewClassifierModulesNames, - viewsIndices, - resultsMonoview) + classifiersDecisions, classifiersNames = getClassifiersDecisions( + monoviewClassifierModulesNames, + viewsIndices, + resultsMonoview) if name in ['DisagreeFusion', 'DoubleFaultFusion']: - classifiersNames, div_measure = couple_div_measure(classifiersNames, classifiersDecisions, - measurement, foldsGroundTruth) + classifiersNames, div_measure = couple_div_measure(classifiersNames, + classifiersDecisions, + measurement, + foldsGroundTruth) elif name == "PseudoCQFusion": - classifiersNames, div_measure = CQ_div_measure(classifiersNames, classifiersDecisions, - measurement, foldsGroundTruth) + classifiersNames, div_measure = CQ_div_measure(classifiersNames, + classifiersDecisions, + measurement, + foldsGroundTruth) else: - classifiersNames, div_measure = global_div_measure(classifiersNames, classifiersDecisions, - measurement, foldsGroundTruth) - multiclass_preds = [monoviewResult.y_test_multiclass_pred for monoviewResult in resultsMonoview] + classifiersNames, div_measure = global_div_measure(classifiersNames, + classifiersDecisions, + measurement, + foldsGroundTruth) + multiclass_preds = [monoviewResult.y_test_multiclass_pred for monoviewResult + in resultsMonoview] if isBiclass(multiclass_preds): - monoviewDecisions = np.array([monoviewResult.full_labels_pred for monoviewResult in resultsMonoview - if classifiersNames[viewsIndices.index(monoviewResult.view_index)] == - monoviewResult.classifier_name]) + monoviewDecisions = np.array( + [monoviewResult.full_labels_pred for monoviewResult in + resultsMonoview + if + classifiersNames[viewsIndices.index(monoviewResult.view_index)] == + monoviewResult.classifier_name]) else: monoviewDecisions = np.array( - [genMulticlassMonoviewDecision(monoviewResult, classificationIndices) for monoviewResult in - resultsMonoview if classifiersNames[viewsIndices.index(monoviewResult.view_index)] == monoviewResult.classifier_name]) + [genMulticlassMonoviewDecision(monoviewResult, + classificationIndices) for + monoviewResult in + resultsMonoview if classifiersNames[viewsIndices.index( + monoviewResult.view_index)] == monoviewResult.classifier_name]) argumentsList = [] arguments = {"CL_type": name, "views": views, @@ -193,7 +245,7 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, "viewsIndices": viewsIndices, "NB_CLASS": len(args.CL_classes), "LABELS_NAMES": args.CL_classes, - name+"KWARGS": { + name + "KWARGS": { "weights": args.DGF_weights, "classifiersNames": classifiersNames, "monoviewDecisions": monoviewDecisions, @@ -204,23 +256,28 @@ def getArgs(args, benchmark, views, viewsIndices, randomState, argumentsList.append(arguments) return argumentsList + def genParamsSets(classificationKWARGS, randomState, nIter=1): """Used to generate parameters sets for the random hyper parameters optimization function""" - weights = [randomState.random_sample(len(classificationKWARGS["classifiersNames"])) for _ in range(nIter)] - nomralizedWeights = [[weightVector/np.sum(weightVector)] for weightVector in weights] + weights = [ + randomState.random_sample(len(classificationKWARGS["classifiersNames"])) + for _ in range(nIter)] + nomralizedWeights = [[weightVector / np.sum(weightVector)] for weightVector + in weights] return nomralizedWeights class DiversityFusionClass: - """This is a parent class for all the diversity fusion based classifiers.""" def __init__(self, randomState, NB_CORES=1, **kwargs): """Used to init the instances""" if kwargs["weights"] == []: - self.weights = [1.0/len(kwargs["classifiersNames"]) for _ in range(len(kwargs["classifiersNames"]))] + self.weights = [1.0 / len(kwargs["classifiersNames"]) for _ in + range(len(kwargs["classifiersNames"]))] else: - self.weights = np.array(kwargs["weights"])/np.sum(np.array(kwargs["weights"])) + self.weights = np.array(kwargs["weights"]) / np.sum( + np.array(kwargs["weights"])) self.monoviewDecisions = kwargs["monoviewDecisions"] self.classifiersNames = kwargs["classifiersNames"] self.nbClass = kwargs["nbCLass"] @@ -230,7 +287,8 @@ class DiversityFusionClass: """ Used to set the weights""" self.weights = paramsSet[0] - def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]): + def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, + metric=["f1_score", None]): """No need to fit as the monoview classifiers are already fitted""" pass @@ -240,8 +298,10 @@ class DiversityFusionClass: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) votes = np.zeros((len(usedIndices), self.nbClass), dtype=float) for usedIndex, exampleIndex in enumerate(usedIndices): - for monoviewDecisionIndex, monoviewDecision in enumerate(self.monoviewDecisions): - votes[usedIndex, monoviewDecision[exampleIndex]] += 1#self.weights[monoviewDecisionIndex] + for monoviewDecisionIndex, monoviewDecision in enumerate( + self.monoviewDecisions): + votes[usedIndex, monoviewDecision[ + exampleIndex]] += 1 # self.weights[monoviewDecisionIndex] predictedLabels = np.argmax(votes, axis=1) return predictedLabels @@ -249,8 +309,9 @@ class DiversityFusionClass: pass def getConfigString(self, classificationKWARGS): - return "weights : "+", ".join(map(str, list(self.weights))) + return "weights : " + ", ".join(map(str, list(self.weights))) def getSpecificAnalysis(self, classificationKWARGS): - stringAnalysis = "Classifiers used for each view : " + ', '.join(self.classifiersNames) + stringAnalysis = "Classifiers used for each view : " + ', '.join( + self.classifiersNames) return stringAnalysis diff --git a/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py b/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py index f711aca712edbe74cefda02959d87eef7214dd76..a8b973396d3c6e1a9b09a626710b87681d72bcc8 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py +++ b/multiview_platform/MonoMultiViewClassifiers/Multiview/ExecMultiview.py @@ -3,21 +3,22 @@ import logging import os import os.path import time + import h5py import numpy as np +from .MultiviewUtils import MultiviewResult +from .. import MultiviewClassifiers from ..utils import HyperParameterSearch from ..utils.Dataset import getShape -from .. import MultiviewClassifiers -from .MultiviewUtils import MultiviewResult - # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, DATASET): +def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, + DATASET): """Used to init the constants""" views = kwargs["views"] viewsIndices = kwargs["viewsIndices"] @@ -28,8 +29,10 @@ def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, learningRate = len(classificationIndices[0]) / float( (len(classificationIndices[0]) + len(classificationIndices[1]))) t_start = time.time() - logging.info("Info\t: Classification - Database : " + str(name) + " ; Views : " + ", ".join(views) + - " ; Algorithm : " + CL_type + " ; Cores : " + str(nbCores) + ", Train ratio : " + str(learningRate) + + logging.info("Info\t: Classification - Database : " + str( + name) + " ; Views : " + ", ".join(views) + + " ; Algorithm : " + CL_type + " ; Cores : " + str( + nbCores) + ", Train ratio : " + str(learningRate) + ", CV on " + str(KFolds.n_splits) + " folds") for viewIndex, viewName in zip(viewsIndices, views): @@ -38,7 +41,9 @@ def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, return CL_type, t_start, viewsIndices, classificationKWARGS, views, learningRate -def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, classificationKWARGS, directory, learningRate, name, imagesAnalysis): +def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, + classificationKWARGS, directory, learningRate, name, + imagesAnalysis): labelsSet = set(LABELS_DICTIONARY.values()) logging.info(stringAnalysis) viewsString = "-".join(views) @@ -61,26 +66,33 @@ def saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, clas for imageName in imagesAnalysis.keys(): if os.path.isfile(outputFileName + imageName + ".png"): for i in range(1, 20): - testFileName = outputFileName + imageName + "-" + str(i) + ".png" + testFileName = outputFileName + imageName + "-" + str( + i) + ".png" if not os.path.isfile(testFileName): imagesAnalysis[imageName].savefig(testFileName) break - imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png') + imagesAnalysis[imageName].savefig( + outputFileName + imageName + '.png') -def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, +def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, + databaseType, path, LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): + hyperParamSearch=False, nbCores=1, metrics=None, + nIter=30, **arguments): """Used to load an HDF5 dataset for each parallel job and execute multiview classification""" DATASET = h5py.File(path + name + str(coreIndex) + ".hdf5", "r") - return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, + return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, + databaseType, path, LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments) + hyperParamSearch=hyperParamSearch, metrics=metrics, + nIter=nIter, **arguments) -def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, - LABELS_DICTIONARY, randomState, labels, +def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, + nbCores, databaseType, path, + LABELS_DICTIONARY, randomState, labels, hyperParamSearch=False, metrics=None, nIter=30, **kwargs): """Used to execute multiview classification and result analysis""" logging.debug("Start:\t Initialize constants") @@ -89,49 +101,61 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor viewsIndices, \ classificationKWARGS, \ views, \ - learningRate = initConstants(kwargs, classificationIndices, metrics,name, nbCores,KFolds, DATASET) + learningRate = initConstants(kwargs, classificationIndices, metrics, name, + nbCores, KFolds, DATASET) logging.debug("Done:\t Initialize constants") extractionTime = time.time() - t_start - logging.info("Info:\t Extraction duration "+str(extractionTime)+"s") + logging.info("Info:\t Extraction duration " + str(extractionTime) + "s") logging.debug("Start:\t Getting train/test split") learningIndices, validationIndices, testIndicesMulticlass = classificationIndices logging.debug("Done:\t Getting train/test split") logging.debug("Start:\t Getting classifiers modules") - classifierPackage = getattr(MultiviewClassifiers, CL_type) # Permet d'appeler un module avec une string - classifierModule = getattr(classifierPackage, CL_type+"Module") - classifierClass = getattr(classifierModule, CL_type+"Class") + classifierPackage = getattr(MultiviewClassifiers, + CL_type) # Permet d'appeler un module avec une string + classifierModule = getattr(classifierPackage, CL_type + "Module") + classifierClass = getattr(classifierModule, CL_type + "Class") analysisModule = getattr(classifierPackage, "analyzeResults") logging.debug("Done:\t Getting classifiers modules") logging.debug("Start:\t Optimizing hyperparameters") if hyperParamSearch != "None": - classifier = HyperParameterSearch.searchBestSettings(DATASET, labels, classifierPackage, - CL_type, metrics, learningIndices, - KFolds, randomState, + classifier = HyperParameterSearch.searchBestSettings(DATASET, labels, + classifierPackage, + CL_type, metrics, + learningIndices, + KFolds, + randomState, viewsIndices=viewsIndices, - searchingTool=hyperParamSearch, nIter=nIter, + searchingTool=hyperParamSearch, + nIter=nIter, **classificationKWARGS) else: - classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, + **classificationKWARGS) logging.debug("Done:\t Optimizing hyperparameters") logging.debug("Start:\t Fitting classifier") - classifier.fit_hdf5(DATASET, labels, trainIndices=learningIndices, viewsIndices=viewsIndices, metric=metrics[0]) + classifier.fit_hdf5(DATASET, labels, trainIndices=learningIndices, + viewsIndices=viewsIndices, metric=metrics[0]) logging.debug("Done:\t Fitting classifier") logging.debug("Start:\t Predicting") - trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices) - fullLabels = np.zeros(labels.shape, dtype=int)-100 + trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, + viewsIndices=viewsIndices) + testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, + viewsIndices=viewsIndices) + fullLabels = np.zeros(labels.shape, dtype=int) - 100 for trainIndex, index in enumerate(learningIndices): fullLabels[index] = trainLabels[trainIndex] for testIndex, index in enumerate(validationIndices): fullLabels[index] = testLabels[testIndex] if testIndicesMulticlass != []: - testLabelsMulticlass = classifier.predict_hdf5(DATASET, usedIndices=testIndicesMulticlass, viewsIndices=viewsIndices) + testLabelsMulticlass = classifier.predict_hdf5(DATASET, + usedIndices=testIndicesMulticlass, + viewsIndices=viewsIndices) else: testLabelsMulticlass = [] logging.info("Done:\t Pertidcting") @@ -139,28 +163,31 @@ def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCor classificationTime = time.time() - t_start logging.info("Info:\t Classification duration " + str(extractionTime) + "s") - - #TODO: get better cltype + # TODO: get better cltype logging.info("Start:\t Result Analysis for " + CL_type) times = (extractionTime, classificationTime) - stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute(classifier, trainLabels, - testLabels, DATASET, - classificationKWARGS, classificationIndices, - LABELS_DICTIONARY, views, nbCores, times, - name, KFolds, - hyperParamSearch, nIter, metrics, - viewsIndices, randomState, labels, classifierModule) + stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute( + classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, + LABELS_DICTIONARY, views, nbCores, times, + name, KFolds, + hyperParamSearch, nIter, metrics, + viewsIndices, randomState, labels, classifierModule) logging.info("Done:\t Result Analysis for " + CL_type) logging.debug("Start:\t Saving preds") - saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, classificationKWARGS, directory, + saveResults(LABELS_DICTIONARY, stringAnalysis, views, classifierModule, + classificationKWARGS, directory, learningRate, name, imagesAnalysis) logging.debug("Start:\t Saving preds") - return MultiviewResult(CL_type, classificationKWARGS, metricsScores, fullLabels, testLabelsMulticlass) + return MultiviewResult(CL_type, classificationKWARGS, metricsScores, + fullLabels, testLabelsMulticlass) # return CL_type, classificationKWARGS, metricsScores, fullLabels, testLabelsMulticlass + if __name__ == "__main__": import argparse @@ -170,33 +197,47 @@ if __name__ == "__main__": formatter_class=argparse.ArgumentDefaultsHelpFormatter) groupStandard = parser.add_argument_group('Standard arguments') - groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') - groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of Dataset', default=".hdf5") + groupStandard.add_argument('-log', action='store_true', + help='Use option to activate Logging to Console') + groupStandard.add_argument('--type', metavar='STRING', action='store', + help='Type of Dataset', default=".hdf5") groupStandard.add_argument('--name', metavar='STRING', action='store', - help='Name of Database (default: %(default)s)', default='DB') + help='Name of Database (default: %(default)s)', + default='DB') groupStandard.add_argument('--view', metavar='STRING', action='store', - help='Name of Feature for Classification (default: %(default)s)', default='View0') + help='Name of Feature for Classification (default: %(default)s)', + default='View0') groupStandard.add_argument('--pathF', metavar='STRING', action='store', - help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') + help='Path to the views (default: %(default)s)', + default='Results-FeatExtr/') groupStandard.add_argument('--directory', metavar='STRING', action='store', - help='Path to the views (default: %(default)s)', default='Results-FeatExtr/') - groupStandard.add_argument('--LABELS_DICTIONARY', metavar='STRING', action='store', nargs='+', - help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv') - groupStandard.add_argument('--classificationIndices', metavar='STRING', action='store', + help='Path to the views (default: %(default)s)', + default='Results-FeatExtr/') + groupStandard.add_argument('--LABELS_DICTIONARY', metavar='STRING', + action='store', nargs='+', + help='Name of classLabels CSV-file (default: %(default)s)', + default='classLabels.csv') + groupStandard.add_argument('--classificationIndices', metavar='STRING', + action='store', help='Name of classLabels-Description CSV-file (default: %(default)s)', default='classLabels-Description.csv') - groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, + groupStandard.add_argument('--nbCores', metavar='INT', action='store', + help='Number of cores, -1 for all', type=int, default=1) groupStandard.add_argument('--randomState', metavar='INT', action='store', - help='Seed for the random state or pickable randomstate file', default=42) - groupStandard.add_argument('--hyperParamSearch', metavar='STRING', action='store', + help='Seed for the random state or pickable randomstate file', + default=42) + groupStandard.add_argument('--hyperParamSearch', metavar='STRING', + action='store', help='The type of method used tosearch the best set of hyper parameters', default='randomizedSearch') - groupStandard.add_argument('--metrics', metavar='STRING', action='store', nargs="+", + groupStandard.add_argument('--metrics', metavar='STRING', action='store', + nargs="+", help='Metrics used in the experimentation, the first will be the one used in CV', default=['']) groupStandard.add_argument('--nIter', metavar='INT', action='store', - help='Number of itetarion in hyper parameter search', type=int, + help='Number of itetarion in hyper parameter search', + type=int, default=10) args = parser.parse_args() @@ -217,7 +258,7 @@ if __name__ == "__main__": # Extract the data using MPI ? DATASET = None - labels = None #(get from CSV ?) + labels = None # (get from CSV ?) logfilename = "gen a good logfilename" @@ -231,15 +272,18 @@ if __name__ == "__main__": else: logfile += ".log" - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logfile, level=logging.DEBUG, + logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', + filename=logfile, level=logging.DEBUG, filemode='w') if args.log: logging.getLogger().addHandler(logging.StreamHandler()) - res = ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, - LABELS_DICTIONARY, randomState, labels, - hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **kwargs) + res = ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, + nbCores, databaseType, path, + LABELS_DICTIONARY, randomState, labels, + hyperParamSearch=hyperParamSearch, metrics=metrics, + nIter=nIter, **kwargs) # Pickle the res # Go put your token diff --git a/multiview_platform/MonoMultiViewClassifiers/Multiview/MultiviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Multiview/MultiviewUtils.py index d06972b7a7ee06c3c5c5c248d5c2a04e841d39da..1bc415bf0ac4a9fc1b57116944bbc0c7476ae5b3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Multiview/MultiviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Multiview/MultiviewUtils.py @@ -1,5 +1,6 @@ from .. import MultiviewClassifiers + class MultiviewResult(object): def __init__(self, classifier_name, classifier_config, metrics_scores, full_labels, test_labels_multiclass): @@ -10,6 +11,8 @@ class MultiviewResult(object): self.y_test_multiclass_pred = test_labels_multiclass def get_classifier_name(self): - multiviewClassifierPackage = getattr(MultiviewClassifiers, self.classifier_name) - multiviewClassifierModule = getattr(multiviewClassifierPackage, self.classifier_name + "Module") - return multiviewClassifierModule.genName(self.classifier_config) \ No newline at end of file + multiviewClassifierPackage = getattr(MultiviewClassifiers, + self.classifier_name) + multiviewClassifierModule = getattr(multiviewClassifierPackage, + self.classifier_name + "Module") + return multiviewClassifierModule.genName(self.classifier_config) diff --git a/multiview_platform/MonoMultiViewClassifiers/Multiview/analyzeResults.py b/multiview_platform/MonoMultiViewClassifiers/Multiview/analyzeResults.py index 887e6da3dc8f3d9bde2561e88ccc88ce91b5761a..bd06d7f27ef3792ac2595db77ff6a4632db6ab02 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Multiview/analyzeResults.py +++ b/multiview_platform/MonoMultiViewClassifiers/Multiview/analyzeResults.py @@ -4,34 +4,44 @@ from .. import Metrics __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + def printMetricScore(metricScores, metrics): metricScoreString = "\n\n" for metric in metrics: metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: metricKWARGS = {} - metricScoreString += "\tFor " + metricModule.getConfig(**metricKWARGS) + " : " - metricScoreString += "\n\t\t- Score on train : " + str(metricScores[metric[0]][0]) - metricScoreString += "\n\t\t- Score on test : " + str(metricScores[metric[0]][1]) + metricScoreString += "\tFor " + metricModule.getConfig( + **metricKWARGS) + " : " + metricScoreString += "\n\t\t- Score on train : " + str( + metricScores[metric[0]][0]) + metricScoreString += "\n\t\t- Score on test : " + str( + metricScores[metric[0]][1]) metricScoreString += "\n\n" return metricScoreString -def getTotalMetricScores(metric, trainLabels, testLabels, validationIndices, learningIndices, labels): +def getTotalMetricScores(metric, trainLabels, testLabels, validationIndices, + learningIndices, labels): metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: metricKWARGS = {} try: - trainScore = metricModule.score(labels[learningIndices], trainLabels, **metricKWARGS) + trainScore = metricModule.score(labels[learningIndices], trainLabels, + **metricKWARGS) except: print(labels[learningIndices]) print(trainLabels) - import pdb;pdb.set_trace() - testScore = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) + import pdb; + pdb.set_trace() + testScore = metricModule.score(labels[validationIndices], testLabels, + **metricKWARGS) return [trainScore, testScore] @@ -39,8 +49,10 @@ def getMetricsScores(metrics, trainLabels, testLabels, validationIndices, learningIndices, labels): metricsScores = {} for metric in metrics: - metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, testLabels, - validationIndices, learningIndices, labels) + metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, + testLabels, + validationIndices, + learningIndices, labels) return metricsScores @@ -51,32 +63,39 @@ def execute(classifier, trainLabels, name, KFolds, hyperParamSearch, nIter, metrics, viewsIndices, randomState, labels, classifierModule): - classifierNameString = classifierModule.genName(classificationKWARGS) CLASS_LABELS = labels learningIndices, validationIndices, testIndicesMulticlass = classificationIndices metricModule = getattr(Metrics, metrics[0][0]) if metrics[0][1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metrics[0][1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metrics[0][1])) else: metricKWARGS = {} - scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], CLASS_LABELS[learningIndices], **metricKWARGS) - scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], testLabels, **metricKWARGS) + scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], + CLASS_LABELS[learningIndices], + **metricKWARGS) + scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], + testLabels, **metricKWARGS) classifierConfiguration = classifier.getConfigString(classificationKWARGS) stringAnalysis = "\t\tResult for Multiview classification with " + classifierNameString + \ - "\n\n" + metrics[0][0] + " :\n\t-On Train : " + str(scoreOnTrain) + "\n\t-On Test : " + str( + "\n\n" + metrics[0][0] + " :\n\t-On Train : " + str( + scoreOnTrain) + "\n\t-On Test : " + str( scoreOnTest) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ - ', '.join(LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join(views) + "\n\t-" + str( + ', '.join( + LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join( + views) + "\n\t-" + str( KFolds.n_splits) + \ " folds\n\nClassification configuration : \n\t-Algorithm used : " + classifierNameString + " with : " + classifierConfiguration metricsScores = getMetricsScores(metrics, trainLabels, testLabels, validationIndices, learningIndices, labels) stringAnalysis += printMetricScore(metricsScores, metrics) - stringAnalysis += "\n\n Interpretation : \n\n" + classifier.getSpecificAnalysis(classificationKWARGS) + stringAnalysis += "\n\n Interpretation : \n\n" + classifier.getSpecificAnalysis( + classificationKWARGS) imagesAnalysis = {} - return stringAnalysis, imagesAnalysis, metricsScores \ No newline at end of file + return stringAnalysis, imagesAnalysis, metricsScores diff --git a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/__init__.py b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/__init__.py index 2f08d4c626a5f86381ca7217f6320107a4f9d53c..ab051c97118c8fb7de2bd5ec2aaa75ea007c0dd4 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/MultiviewClassifiers/__init__.py @@ -1,7 +1,8 @@ import os for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): - if module == '__init__.py' or module[-3:] == '.py' or module[-4:] == '.pyc' or module == '__pycache__' : + if module == '__init__.py' or module[-3:] == '.py' or module[ + -4:] == '.pyc' or module == '__pycache__': continue __import__(module, locals(), globals(), [], 1) del module diff --git a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py index 4f97eb04c7ac7f506ba616eca756c6d2eed66945..34fb2ad53ef8eadceef98f57dac0490c360c3b6c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py +++ b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py @@ -1,13 +1,13 @@ # Import built-in modules -import time -import os import errno import logging +import os +import time +import matplotlib as mpl # Import third party modules import matplotlib.pyplot as plt import numpy as np -import matplotlib as mpl # Import own Modules from . import Metrics @@ -44,7 +44,8 @@ def autolabel(rects, ax, set=1, std=None): height = rect.get_height() if std is not None: ax.text(rect.get_x() + rect.get_width() / 2., text_height, - "%.2f" % height + u'\u00B1' + "%.2f" % std[rectIndex], weight=weight, + "%.2f" % height + u'\u00B1' + "%.2f" % std[rectIndex], + weight=weight, ha='center', va='bottom', size="x-small") else: ax.text(rect.get_x() + rect.get_width() / 2., text_height, @@ -109,10 +110,12 @@ def getExampleErrorsBiclass(groud_truth, results): exampleErrors = {} for classifierResult in results: - errorOnExamples = np.equal(classifierResult.full_labels_pred, groud_truth).astype(int) - unseenExamples = np.where(groud_truth==-100)[0] - errorOnExamples[unseenExamples]=-100 - exampleErrors[classifierResult.get_classifier_name()] = {"errorOnExamples": errorOnExamples} + errorOnExamples = np.equal(classifierResult.full_labels_pred, + groud_truth).astype(int) + unseenExamples = np.where(groud_truth == -100)[0] + errorOnExamples[unseenExamples] = -100 + exampleErrors[classifierResult.get_classifier_name()] = { + "errorOnExamples": errorOnExamples} return exampleErrors @@ -139,14 +142,15 @@ def get_fig_size(nb_results, min_size=15, multiplier=1.0, bar_width=0.35): bar_width : float The width of the bars in the figure. Mainly here to centralize bar_width. """ - size = nb_results*multiplier + size = nb_results * multiplier if size < min_size: size = min_size fig_kwargs = {"figsize": (size, size / 3)} return fig_kwargs, bar_width -def sort_by_test_score(train_scores,test_scores, names, train_STDs=None, test_STDs=None): +def sort_by_test_score(train_scores, test_scores, names, train_STDs=None, + test_STDs=None): r"""Used to sort the results (names and both scores) in descending test score order. Parameters @@ -190,7 +194,8 @@ def sort_by_test_score(train_scores,test_scores, names, train_STDs=None, test_ST return sorted_names, sorted_train_scores, sorted_test_scores, sorted_train_STDs, sorted_test_STDs -def plotMetricScores(trainScores, testScores, names, nbResults, metricName, fileName, +def plotMetricScores(trainScores, testScores, names, nbResults, metricName, + fileName, tag="", train_STDs=None, test_STDs=None): r"""Used to plot and save the score barplot for a specific metric. @@ -221,14 +226,17 @@ def plotMetricScores(trainScores, testScores, names, nbResults, metricName, file figKW, barWidth = get_fig_size(nbResults) - names, trainScores, testScores, train_STDs, test_STDs = sort_by_test_score(trainScores, testScores, names, - train_STDs, test_STDs) + names, trainScores, testScores, train_STDs, test_STDs = sort_by_test_score( + trainScores, testScores, names, + train_STDs, test_STDs) f, ax = plt.subplots(nrows=1, ncols=1, **figKW) - ax.set_title(metricName + "\n"+ tag +" scores for each classifier") + ax.set_title(metricName + "\n" + tag + " scores for each classifier") - rects = ax.bar(range(nbResults), testScores, barWidth, color="0.1", yerr=test_STDs) - rect2 = ax.bar(np.arange(nbResults) + barWidth, trainScores, barWidth, color="0.8", yerr=train_STDs) + rects = ax.bar(range(nbResults), testScores, barWidth, color="0.1", + yerr=test_STDs) + rect2 = ax.bar(np.arange(nbResults) + barWidth, trainScores, barWidth, + color="0.8", yerr=train_STDs) autolabel(rects, ax, set=1, std=test_STDs) autolabel(rect2, ax, set=2, std=train_STDs) @@ -241,10 +249,10 @@ def plotMetricScores(trainScores, testScores, names, nbResults, metricName, file plt.tight_layout() except: pass - f.savefig(fileName+'.png') + f.savefig(fileName + '.png') plt.close() import pandas as pd - if train_STDs is None : + if train_STDs is None: dataframe = pd.DataFrame(np.transpose(np.concatenate(( trainScores.reshape((trainScores.shape[0], 1)), testScores.reshape((trainScores.shape[0], 1))), axis=1)), @@ -254,8 +262,9 @@ def plotMetricScores(trainScores, testScores, names, nbResults, metricName, file trainScores.reshape((trainScores.shape[0], 1)), train_STDs.reshape((trainScores.shape[0], 1)), testScores.reshape((trainScores.shape[0], 1)), - test_STDs.reshape((trainScores.shape[0], 1))), axis=1)), columns=names) - dataframe.to_csv(fileName+".csv") + test_STDs.reshape((trainScores.shape[0], 1))), axis=1)), + columns=names) + dataframe.to_csv(fileName + ".csv") def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): @@ -277,16 +286,23 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): ------- """ for metricName, metricScores in metricsScores.items(): - logging.debug("Start:\t Biclass score graph generation for "+metricName) + logging.debug( + "Start:\t Biclass score graph generation for " + metricName) nbResults = len(metricScores["testScores"]) - fileName = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + databaseName +"-"+"_vs_".join(labelsNames)+ "-" + metricName + fileName = directory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + "_vs_".join( + labelsNames) + "-" + metricName - plotMetricScores(np.array(metricScores["trainScores"]), np.array(metricScores["testScores"]), - np.array(metricScores["classifiersNames"]), nbResults, metricName, fileName, tag=" "+" vs ".join(labelsNames)) + plotMetricScores(np.array(metricScores["trainScores"]), + np.array(metricScores["testScores"]), + np.array(metricScores["classifiersNames"]), nbResults, + metricName, fileName, + tag=" " + " vs ".join(labelsNames)) - logging.debug("Done:\t Biclass score graph generation for " + metricName) + logging.debug( + "Done:\t Biclass score graph generation for " + metricName) def iterCmap(statsIter): @@ -304,17 +320,19 @@ def iterCmap(statsIter): norm : matplotlib.colors.BoundaryNorm object The bounds for the colormap. """ - cmapList = ["red", "0.0"]+[str(float((i+1))/statsIter) for i in range(statsIter)] + cmapList = ["red", "0.0"] + [str(float((i + 1)) / statsIter) for i in + range(statsIter)] cmap = mpl.colors.ListedColormap(cmapList) - bounds = [-100*statsIter-0.5, -0.5] + bounds = [-100 * statsIter - 0.5, -0.5] for i in range(statsIter): - bounds.append(i+0.5) - bounds.append(statsIter+0.5) + bounds.append(i + 0.5) + bounds.append(statsIter + 0.5) norm = mpl.colors.BoundaryNorm(bounds, cmap.N) return cmap, norm -def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, fileName, minSize=10, +def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, + fileName, minSize=10, width_denominator=2.0, height_denominator=20.0, statsIter=1): r"""Used to generate a 2D plot of the errors. @@ -350,7 +368,8 @@ def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, f figKW = {"figsize": (figWidth, figHeight)} fig, ax = plt.subplots(nrows=1, ncols=1, **figKW) cmap, norm = iterCmap(statsIter) - cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, aspect='auto') + cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, + aspect='auto') plt.title('Errors depending on the classifier') ticks = np.arange(nbCopies / 2 - 0.5, nbClassifiers * nbCopies, nbCopies) labels = classifiersNames @@ -358,7 +377,7 @@ def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, f cbar = fig.colorbar(cax, ticks=[-100 * statsIter / 2, 0, statsIter]) cbar.ax.set_yticklabels(['Unseen', 'Always Wrong', 'Always Right']) fig.tight_layout() - fig.savefig(fileName+"error_analysis_2D.png", bbox_inches="tight") + fig.savefig(fileName + "error_analysis_2D.png", bbox_inches="tight") plt.close() @@ -386,7 +405,7 @@ def publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, fileName): plt.bar(x, errorOnExamples) plt.ylim([0, nbClassifiers]) plt.title("Number of classifiers that failed to classify each example") - fig.savefig(fileName+"error_analysis_bar.png") + fig.savefig(fileName + "error_analysis_bar.png") plt.close() @@ -430,9 +449,11 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2): data = np.zeros((nbExamples, nbClassifiers * nbCopies)) temp_data = np.zeros((nbExamples, nbClassifiers)) - for classifierIndex, (classifierName, errorOnExamples) in enumerate(example_errors.items()): + for classifierIndex, (classifierName, errorOnExamples) in enumerate( + example_errors.items()): for iterIndex in range(nbCopies): - data[:, classifierIndex * nbCopies + iterIndex] = errorOnExamples["errorOnExamples"] + data[:, classifierIndex * nbCopies + iterIndex] = errorOnExamples[ + "errorOnExamples"] temp_data[:, classifierIndex] = errorOnExamples["errorOnExamples"] errorOnExamples = -1 * np.sum(data, axis=1) / nbCopies + nbClassifiers @@ -443,19 +464,21 @@ def gen_error_data(example_errors, base_file_name, nbCopies=2): def publishExampleErrors(exampleErrors, directory, databaseName, labelsNames): - logging.debug("Start:\t Biclass Label analysis figure generation") - base_file_name = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + "_vs_".join( + base_file_name = directory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + "_vs_".join( labelsNames) + "-" - nbClassifiers, nbExamples, nCopies, classifiersNames, data, errorOnExamples = gen_error_data(exampleErrors, - base_file_name) + nbClassifiers, nbExamples, nCopies, classifiersNames, data, errorOnExamples = gen_error_data( + exampleErrors, + base_file_name) + publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nCopies, + base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nCopies, base_file_name) - - publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, base_file_name) + publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, + base_file_name) logging.debug("Done:\t Biclass Label analysis figures generation") @@ -524,17 +547,22 @@ def analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics): labelsNames = [arguments["LABELS_DICTIONARY"][0], arguments["LABELS_DICTIONARY"][1]] - publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames) - publishExampleErrors(exampleErrors, directory, databaseName, labelsNames) + publishMetricsGraphs(metricsScores, directory, databaseName, + labelsNames) + publishExampleErrors(exampleErrors, directory, databaseName, + labelsNames) - biclassResults[iteridex][str(classifierPositive) + str(classifierNegative)] = {"metricsScores": metricsScores, - "exampleErrors": exampleErrors} + biclassResults[iteridex][ + str(classifierPositive) + str(classifierNegative)] = { + "metricsScores": metricsScores, + "exampleErrors": exampleErrors} logging.debug("Done:\t Analzing all biclass resuls") return biclassResults -def genMetricsScoresMulticlass(results, trueLabels, metrics, argumentsDictionaries): +def genMetricsScoresMulticlass(results, trueLabels, metrics, + argumentsDictionaries): """Used to add all the metrics scores to the multiclass result structure for each clf and each iteration""" logging.debug("Start:\t Getting multiclass scores for each metric") @@ -544,20 +572,24 @@ def genMetricsScoresMulticlass(results, trueLabels, metrics, argumentsDictionari for iterIndex, iterResults in enumerate(results): for argumentsDictionary in argumentsDictionaries: - if argumentsDictionary["flag"][0]==iterIndex: - classificationIndices = argumentsDictionary["classificationIndices"] + if argumentsDictionary["flag"][0] == iterIndex: + classificationIndices = argumentsDictionary[ + "classificationIndices"] trainIndices, testIndices, multiclassTestIndices = classificationIndices for classifierName, resultDictionary in iterResults.items(): if not "metricsScores" in resultDictionary: - results[iterIndex][classifierName]["metricsScores"]={} + results[iterIndex][classifierName]["metricsScores"] = {} trainScore = metricModule.score(trueLabels[trainIndices], - resultDictionary["labels"][trainIndices], + resultDictionary["labels"][ + trainIndices], multiclass=True) - testScore = metricModule.score(trueLabels[multiclassTestIndices], - resultDictionary["labels"][multiclassTestIndices], - multiclass=True) - results[iterIndex][classifierName]["metricsScores"][metric[0]] = [trainScore, testScore] + testScore = metricModule.score( + trueLabels[multiclassTestIndices], + resultDictionary["labels"][multiclassTestIndices], + multiclass=True) + results[iterIndex][classifierName]["metricsScores"][ + metric[0]] = [trainScore, testScore] logging.debug("Done:\t Getting multiclass scores for each metric") return results @@ -570,51 +602,69 @@ def getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels): for iterIndex, iterResults in enumerate(multiclassResults): for classifierName, classifierResults in iterResults.items(): errorOnExamples = classifierResults["labels"] == multiclassLabels - multiclassResults[iterIndex][classifierName]["errorOnExamples"] = errorOnExamples.astype(int) + multiclassResults[iterIndex][classifierName][ + "errorOnExamples"] = errorOnExamples.astype(int) logging.debug("Done:\t Getting errors on each example for each classifier") return multiclassResults -def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, databaseName): +def publishMulticlassScores(multiclassResults, metrics, statsIter, direcories, + databaseName): for iterIndex in range(statsIter): directory = direcories[iterIndex] for metric in metrics: - logging.debug("Start:\t Multiclass score graph generation for "+metric[0]) - classifiersNames = np.array([classifierName for classifierName in multiclassResults[iterIndex].keys()]) - trainScores = np.array([multiclassResults[iterIndex][classifierName]["metricsScores"][metric[0]][0] - for classifierName in classifiersNames]) - validationScores = np.array([multiclassResults[iterIndex][classifierName]["metricsScores"][metric[0]][1] - for classifierName in classifiersNames]) - + logging.debug( + "Start:\t Multiclass score graph generation for " + metric[0]) + classifiersNames = np.array([classifierName for classifierName in + multiclassResults[iterIndex].keys()]) + trainScores = np.array([multiclassResults[iterIndex][ + classifierName]["metricsScores"][ + metric[0]][0] + for classifierName in classifiersNames]) + validationScores = np.array([multiclassResults[iterIndex][ + classifierName]["metricsScores"][ + metric[0]][1] + for classifierName in + classifiersNames]) nbResults = classifiersNames.shape[0] - fileName = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[0] + ".png" + fileName = directory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" + metric[ + 0] + ".png" - plotMetricScores(trainScores, validationScores, classifiersNames, nbResults, metric[0], fileName, tag=" multiclass") + plotMetricScores(trainScores, validationScores, classifiersNames, + nbResults, metric[0], fileName, tag=" multiclass") - logging.debug("Done:\t Multiclass score graph generation for " + metric[0]) + logging.debug( + "Done:\t Multiclass score graph generation for " + metric[0]) -def publishMulticlassExmapleErrors(multiclassResults, directories, databaseName): +def publishMulticlassExmapleErrors(multiclassResults, directories, + databaseName): for iterIndex, multiclassResult in enumerate(multiclassResults): directory = directories[iterIndex] logging.debug("Start:\t Multiclass Label analysis figure generation") - base_file_name = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + databaseName +"-" + base_file_name = directory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + databaseName + "-" - nbClassifiers, nbExamples, nCopies, classifiersNames, data, errorOnExamples = gen_error_data(multiclassResult, - base_file_name) + nbClassifiers, nbExamples, nCopies, classifiersNames, data, errorOnExamples = gen_error_data( + multiclassResult, + base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nCopies, base_file_name) + publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, + nCopies, base_file_name) - publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, base_file_name) + publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, + base_file_name) logging.debug("Done:\t Multiclass Label analysis figure generation") -def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, multiclassLabels, +def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, + nbExamples, nbLabels, multiclassLabels, metrics, classificationIndices, directories): """Used to transform one versus one results in multiclass results and to publish it""" multiclassResults = [{} for _ in range(statsIter)] @@ -626,34 +676,48 @@ def analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamp for benchmarkArgumentDictionary in benchmarkArgumentDictionaries: if benchmarkArgumentDictionary["flag"] == flag: - trainIndices, testIndices, testMulticlassIndices = benchmarkArgumentDictionary["classificationIndices"] + trainIndices, testIndices, testMulticlassIndices = \ + benchmarkArgumentDictionary["classificationIndices"] for classifierResult in result: classifierName = classifierResult.get_classifier_name() if classifierName not in multiclassResults[iterIndex]: - multiclassResults[iterIndex][classifierName] = np.zeros((nbExamples, nbLabels),dtype=int) + multiclassResults[iterIndex][classifierName] = np.zeros( + (nbExamples, nbLabels), dtype=int) for exampleIndex in trainIndices: label = classifierResult.full_labels_pred[exampleIndex] if label == 1: - multiclassResults[iterIndex][classifierName][exampleIndex, classifierPositive] += 1 + multiclassResults[iterIndex][classifierName][ + exampleIndex, classifierPositive] += 1 else: - multiclassResults[iterIndex][classifierName][exampleIndex, classifierNegative] += 1 - for multiclassIndex, exampleIndex in enumerate(testMulticlassIndices): + multiclassResults[iterIndex][classifierName][ + exampleIndex, classifierNegative] += 1 + for multiclassIndex, exampleIndex in enumerate( + testMulticlassIndices): label = classifierResult.y_test_multiclass_pred[multiclassIndex] if label == 1: - multiclassResults[iterIndex][classifierName][exampleIndex, classifierPositive] += 1 + multiclassResults[iterIndex][classifierName][ + exampleIndex, classifierPositive] += 1 else: - multiclassResults[iterIndex][classifierName][exampleIndex, classifierNegative] += 1 + multiclassResults[iterIndex][classifierName][ + exampleIndex, classifierNegative] += 1 for iterIndex, multiclassiterResult in enumerate(multiclassResults): for key, value in multiclassiterResult.items(): - multiclassResults[iterIndex][key] = {"labels": np.argmax(value, axis=1)} - - multiclassResults = genMetricsScoresMulticlass(multiclassResults, multiclassLabels, metrics, benchmarkArgumentDictionaries) - multiclassResults = getErrorOnLabelsMulticlass(multiclassResults, multiclassLabels) - - publishMulticlassScores(multiclassResults, metrics, statsIter, directories, benchmarkArgumentDictionaries[0]["args"].name) - publishMulticlassExmapleErrors(multiclassResults, directories, benchmarkArgumentDictionaries[0]["args"].name) + multiclassResults[iterIndex][key] = { + "labels": np.argmax(value, axis=1)} + + multiclassResults = genMetricsScoresMulticlass(multiclassResults, + multiclassLabels, metrics, + benchmarkArgumentDictionaries) + multiclassResults = getErrorOnLabelsMulticlass(multiclassResults, + multiclassLabels) + + publishMulticlassScores(multiclassResults, metrics, statsIter, directories, + benchmarkArgumentDictionaries[0]["args"].name) + publishMulticlassExmapleErrors(multiclassResults, directories, + benchmarkArgumentDictionaries[0][ + "args"].name) return multiclassResults @@ -661,12 +725,16 @@ def numpy_mean_and_std(scores_array): return np.mean(scores_array, axis=1), np.std(scores_array, axis=1) -def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter, minSize=10): +def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, + classifiersDict, dataBaseName, statsIter, + minSize=10): for labelsCombination, iterResult in iterResults.items(): - currentDirectory = directory+ labelsDictionary[int(labelsCombination[0])]+"-vs-"+labelsDictionary[int(labelsCombination[1])]+"/" - if not os.path.exists(os.path.dirname(currentDirectory+"a")): + currentDirectory = directory + labelsDictionary[ + int(labelsCombination[0])] + "-vs-" + labelsDictionary[ + int(labelsCombination[1])] + "/" + if not os.path.exists(os.path.dirname(currentDirectory + "a")): try: - os.makedirs(os.path.dirname(currentDirectory+"a")) + os.makedirs(os.path.dirname(currentDirectory + "a")) except OSError as exc: if exc.errno != errno.EEXIST: raise @@ -676,11 +744,15 @@ def publishIterBiclassMetricsScores(iterResults, directory, labelsDictionary, cl testMeans, testSTDs = numpy_mean_and_std(scores["testScores"]) names = np.array([name for name in classifiersDict.keys()]) - fileName = currentDirectory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + dataBaseName + "-Mean_on_" + str(statsIter) + "_iter-" + metricName + ".png" + fileName = currentDirectory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + dataBaseName + "-Mean_on_" + str( + statsIter) + "_iter-" + metricName + ".png" nbResults = names.shape[0] - plotMetricScores(trainScores=trainMeans, testScores=testMeans, names=names, nbResults=nbResults, - metricName=metricName, fileName=fileName, tag=" averaged", + plotMetricScores(trainScores=trainMeans, testScores=testMeans, + names=names, nbResults=nbResults, + metricName=metricName, fileName=fileName, + tag=" averaged", train_STDs=trainSTDs, test_STDs=testSTDs) @@ -690,51 +762,71 @@ def gen_error_dat_glob(combiResults, statsIter, base_file_name): data = np.transpose(combiResults["errorOnExamples"]) errorOnExamples = -1 * np.sum(data, axis=1) + (nbClassifiers * statsIter) np.savetxt(base_file_name + "clf_errors.csv", data, delimiter=",") - np.savetxt(base_file_name + "example_errors.csv", errorOnExamples, delimiter=",") + np.savetxt(base_file_name + "example_errors.csv", errorOnExamples, + delimiter=",") return nbExamples, nbClassifiers, data, errorOnExamples -def publishIterBiclassExampleErrors(iterResults, directory, labelsDictionary, classifiersDict, statsIter, minSize=10): +def publishIterBiclassExampleErrors(iterResults, directory, labelsDictionary, + classifiersDict, statsIter, minSize=10): for labelsCombination, combiResults in iterResults.items(): - base_file_name = directory + labelsDictionary[int(labelsCombination[0])]+"-vs-"+\ - labelsDictionary[int(labelsCombination[1])]+"/" + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" - classifiersNames = [classifierName for classifierName in classifiersDict.values()] - logging.debug("Start:\t Global biclass label analysis figure generation") + base_file_name = directory + labelsDictionary[ + int(labelsCombination[0])] + "-vs-" + \ + labelsDictionary[ + int(labelsCombination[1])] + "/" + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + classifiersNames = [classifierName for classifierName in + classifiersDict.values()] + logging.debug( + "Start:\t Global biclass label analysis figure generation") - nbExamples, nbClassifiers, data, errorOnExamples = gen_error_dat_glob(combiResults, statsIter, base_file_name) + nbExamples, nbClassifiers, data, errorOnExamples = gen_error_dat_glob( + combiResults, statsIter, base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, 1, base_file_name, statsIter=statsIter) + publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, 1, + base_file_name, statsIter=statsIter) - publishErrorsBarPlot(errorOnExamples, nbClassifiers*statsIter, nbExamples, base_file_name) + publishErrorsBarPlot(errorOnExamples, nbClassifiers * statsIter, + nbExamples, base_file_name) - logging.debug("Done:\t Global biclass label analysis figures generation") + logging.debug( + "Done:\t Global biclass label analysis figures generation") -def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, dataBaseName, directory, statsIter, minSize=10): +def publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, + dataBaseName, directory, statsIter, + minSize=10): for metricName, scores in iterMulticlassResults["metricsScores"].items(): - trainMeans, trainSTDs = numpy_mean_and_std(scores["trainScores"]) testMeans, testSTDs = numpy_mean_and_std(scores["testScores"]) nbResults = classifiersNames.shape[0] - fileName = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + dataBaseName + "-Mean_on_" + str(statsIter) + "_iter-" + metricName + ".png" + fileName = directory + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "-" + dataBaseName + "-Mean_on_" + str( + statsIter) + "_iter-" + metricName + ".png" - plotMetricScores(trainScores=trainMeans, testScores=testMeans, names=classifiersNames, nbResults=nbResults, - metricName=metricName, fileName=fileName, tag=" averaged multiclass", + plotMetricScores(trainScores=trainMeans, testScores=testMeans, + names=classifiersNames, nbResults=nbResults, + metricName=metricName, fileName=fileName, + tag=" averaged multiclass", train_STDs=trainSTDs, test_STDs=testSTDs) -def publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter, minSize=10): - - logging.debug("Start:\t Global multiclass label analysis figures generation") +def publishIterMulticlassExampleErrors(iterMulticlassResults, directory, + classifiersNames, statsIter, minSize=10): + logging.debug( + "Start:\t Global multiclass label analysis figures generation") base_file_name = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" - nbExamples, nbClassifiers, data, errorOnExamples = gen_error_dat_glob(iterMulticlassResults, statsIter, base_file_name) + nbExamples, nbClassifiers, data, errorOnExamples = gen_error_dat_glob( + iterMulticlassResults, statsIter, base_file_name) - publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, 1, base_file_name, statsIter=statsIter) + publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, 1, + base_file_name, statsIter=statsIter) - publishErrorsBarPlot(errorOnExamples, nbClassifiers * statsIter, nbExamples, base_file_name) + publishErrorsBarPlot(errorOnExamples, nbClassifiers * statsIter, nbExamples, + base_file_name) logging.debug("Done:\t Global multiclass label analysis figures generation") @@ -742,64 +834,89 @@ def publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classif def gen_classifiers_dict(results, metrics): classifiersDict = dict((classifierName, classifierIndex) for classifierIndex, classifierName - in enumerate(results[0][list(results[0].keys())[0]]["metricsScores"][metrics[0][0]]["classifiersNames"])) + in enumerate( + results[0][list(results[0].keys())[0]]["metricsScores"][metrics[0][0]][ + "classifiersNames"])) return classifiersDict, len(classifiersDict) -def add_new_labels_combination(iterBiclassResults, labelsComination, nbClassifiers, nbExamples): +def add_new_labels_combination(iterBiclassResults, labelsComination, + nbClassifiers, nbExamples): if labelsComination not in iterBiclassResults: iterBiclassResults[labelsComination] = {} iterBiclassResults[labelsComination]["metricsScores"] = {} - iterBiclassResults[labelsComination]["errorOnExamples"] = np.zeros((nbClassifiers, - nbExamples), - dtype=int) + iterBiclassResults[labelsComination]["errorOnExamples"] = np.zeros( + (nbClassifiers, + nbExamples), + dtype=int) return iterBiclassResults -def add_new_metric(iterBiclassResults, metric, labelsComination, nbClassifiers, statsIter): +def add_new_metric(iterBiclassResults, metric, labelsComination, nbClassifiers, + statsIter): if metric[0] not in iterBiclassResults[labelsComination]["metricsScores"]: - iterBiclassResults[labelsComination]["metricsScores"][metric[0]] = {"trainScores": - np.zeros((nbClassifiers, statsIter)), - "testScores": - np.zeros((nbClassifiers, statsIter))} + iterBiclassResults[labelsComination]["metricsScores"][metric[0]] = { + "trainScores": + np.zeros((nbClassifiers, statsIter)), + "testScores": + np.zeros((nbClassifiers, statsIter))} return iterBiclassResults -def analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples): +def analyzebiclassIter(biclassResults, metrics, statsIter, directory, + labelsDictionary, dataBaseName, nbExamples): """Used to format the results in order to plot the mean results on the iterations""" iterBiclassResults = {} - classifiersDict, nbClassifiers = gen_classifiers_dict(biclassResults, metrics) + classifiersDict, nbClassifiers = gen_classifiers_dict(biclassResults, + metrics) for iterIndex, biclassResult in enumerate(biclassResults): for labelsComination, results in biclassResult.items(): for metric in metrics: - iterBiclassResults = add_new_labels_combination(iterBiclassResults, labelsComination, nbClassifiers, nbExamples) - iterBiclassResults = add_new_metric(iterBiclassResults, metric, labelsComination, nbClassifiers, statsIter) + iterBiclassResults = add_new_labels_combination( + iterBiclassResults, labelsComination, nbClassifiers, + nbExamples) + iterBiclassResults = add_new_metric(iterBiclassResults, metric, + labelsComination, + nbClassifiers, statsIter) metric_results = results["metricsScores"][metric[0]] - for classifierName, trainScore, testScore in zip(metric_results["classifiersNames"], - metric_results["trainScores"], - metric_results["testScores"],): - - iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["trainScores"][classifiersDict[classifierName], iterIndex] = trainScore - iterBiclassResults[labelsComination]["metricsScores"][metric[0]]["testScores"][classifiersDict[classifierName], iterIndex] = testScore - - for classifierName, errorOnExample in results["exampleErrors"].items(): - iterBiclassResults[labelsComination]["errorOnExamples"][classifiersDict[classifierName], :] += errorOnExample["errorOnExamples"] - - publishIterBiclassMetricsScores(iterBiclassResults, directory, labelsDictionary, classifiersDict, dataBaseName, statsIter) - publishIterBiclassExampleErrors(iterBiclassResults, directory, labelsDictionary, classifiersDict, statsIter) - - -def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName, nbExamples): + for classifierName, trainScore, testScore in zip( + metric_results["classifiersNames"], + metric_results["trainScores"], + metric_results["testScores"], ): + iterBiclassResults[labelsComination]["metricsScores"][ + metric[0]]["trainScores"][ + classifiersDict[classifierName], iterIndex] = trainScore + iterBiclassResults[labelsComination]["metricsScores"][ + metric[0]]["testScores"][ + classifiersDict[classifierName], iterIndex] = testScore + + for classifierName, errorOnExample in results[ + "exampleErrors"].items(): + iterBiclassResults[labelsComination]["errorOnExamples"][ + classifiersDict[classifierName], :] += errorOnExample[ + "errorOnExamples"] + + publishIterBiclassMetricsScores(iterBiclassResults, directory, + labelsDictionary, classifiersDict, + dataBaseName, statsIter) + publishIterBiclassExampleErrors(iterBiclassResults, directory, + labelsDictionary, classifiersDict, + statsIter) + + +def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, + dataBaseName, nbExamples): """Used to mean the multiclass results on the iterations executed with different random states""" logging.debug("Start:\t Getting mean results for multiclass classification") iterMulticlassResults = {} nbClassifiers = len(multiclassResults[0]) - iterMulticlassResults["errorOnExamples"] = np.zeros((nbClassifiers,nbExamples),dtype=int) + iterMulticlassResults["errorOnExamples"] = np.zeros( + (nbClassifiers, nbExamples), dtype=int) iterMulticlassResults["metricsScores"] = {} classifiersNames = [] for iterIndex, multiclassResult in enumerate(multiclassResults): @@ -809,30 +926,46 @@ def analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, data classifierIndex = classifiersNames.index(classifierName) for metric in metrics: if metric[0] not in iterMulticlassResults["metricsScores"]: - iterMulticlassResults["metricsScores"][metric[0]] = {"trainScores": - np.zeros((nbClassifiers, statsIter)), - "testScores": - np.zeros((nbClassifiers, statsIter))} - iterMulticlassResults["metricsScores"][metric[0]]["trainScores"][classifierIndex, iterIndex] = classifierResults["metricsScores"][metric[0]][0] - iterMulticlassResults["metricsScores"][metric[0]]["testScores"][classifierIndex, iterIndex] = classifierResults["metricsScores"][metric[0]][1] - iterMulticlassResults["errorOnExamples"][classifierIndex, :] += classifierResults["errorOnExamples"] + iterMulticlassResults["metricsScores"][metric[0]] = { + "trainScores": + np.zeros((nbClassifiers, statsIter)), + "testScores": + np.zeros((nbClassifiers, statsIter))} + iterMulticlassResults["metricsScores"][metric[0]][ + "trainScores"][classifierIndex, iterIndex] = \ + classifierResults["metricsScores"][metric[0]][0] + iterMulticlassResults["metricsScores"][metric[0]]["testScores"][ + classifierIndex, iterIndex] = \ + classifierResults["metricsScores"][metric[0]][1] + iterMulticlassResults["errorOnExamples"][classifierIndex, :] += \ + classifierResults["errorOnExamples"] logging.debug("Start:\t Getting mean results for multiclass classification") classifiersNames = np.array(classifiersNames) - publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, dataBaseName, directory, statsIter) - publishIterMulticlassExampleErrors(iterMulticlassResults, directory, classifiersNames, statsIter) + publishIterMulticlassMetricsScores(iterMulticlassResults, classifiersNames, + dataBaseName, directory, statsIter) + publishIterMulticlassExampleErrors(iterMulticlassResults, directory, + classifiersNames, statsIter) -def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, multiclassLabels, metrics, - classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels): +def getResults(results, statsIter, nbMulticlass, benchmarkArgumentDictionaries, + multiclassLabels, metrics, + classificationIndices, directories, directory, labelsDictionary, + nbExamples, nbLabels): """Used to analyze the results of the previous benchmarks""" dataBaseName = benchmarkArgumentDictionaries[0]["args"].name - biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, statsIter, metrics) + biclassResults = analyzeBiclass(results, benchmarkArgumentDictionaries, + statsIter, metrics) if nbMulticlass > 1: - multiclassResults = analyzeMulticlass(results, statsIter, benchmarkArgumentDictionaries, nbExamples, nbLabels, - multiclassLabels, metrics, classificationIndices, directories) + multiclassResults = analyzeMulticlass(results, statsIter, + benchmarkArgumentDictionaries, + nbExamples, nbLabels, + multiclassLabels, metrics, + classificationIndices, + directories) if statsIter > 1: - analyzebiclassIter(biclassResults, metrics, statsIter, directory, labelsDictionary, dataBaseName, nbExamples) + analyzebiclassIter(biclassResults, metrics, statsIter, directory, + labelsDictionary, dataBaseName, nbExamples) if nbMulticlass > 1: - analyzeIterMulticlass(multiclassResults, directory, statsIter, metrics, dataBaseName, nbExamples) - + analyzeIterMulticlass(multiclassResults, directory, statsIter, + metrics, dataBaseName, nbExamples) diff --git a/multiview_platform/MonoMultiViewClassifiers/__init__.py b/multiview_platform/MonoMultiViewClassifiers/__init__.py index 64731a57e2464eac6fdfd6eb71c3ad8b4bef943a..cf3772451d0cd072bd85c63925d798f94df00e1b 100644 --- a/multiview_platform/MonoMultiViewClassifiers/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/__init__.py @@ -1,3 +1,4 @@ -from . import ExecClassif, ResultAnalysis, Metrics, MonoviewClassifiers, Monoview, Multiview, utils, MultiviewClassifiers +from . import ExecClassif, ResultAnalysis, Metrics, MonoviewClassifiers, \ + Monoview, Multiview, utils, MultiviewClassifiers __all__ = ['Metrics', 'Monoview', 'MonoviewClassifiers', 'Multiview', 'utils'] diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py b/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py index f11b060f303bf357ce5e18a467466fd509796f84..71ff205b3cd176ef6256da9910db007344cc13d9 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/Dataset.py @@ -2,6 +2,7 @@ import logging import os import select import sys + import h5py import numpy as np from scipy import sparse @@ -21,12 +22,15 @@ def getV(DATASET, viewIndex, usedIndices=None): usedIndices = usedIndices[sortedIndices] if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]: - return DATASET.get("View" + str(viewIndex))[usedIndices, :][np.argsort(sortedIndices), :] + return DATASET.get("View" + str(viewIndex))[usedIndices, :][ + np.argsort(sortedIndices), :] else: - sparse_mat = sparse.csr_matrix((DATASET.get("View" + str(viewIndex)).get("data").value, - DATASET.get("View" + str(viewIndex)).get("indices").value, - DATASET.get("View" + str(viewIndex)).get("indptr").value), - shape=DATASET.get("View" + str(viewIndex)).attrs["shape"])[usedIndices, :][ + sparse_mat = sparse.csr_matrix( + (DATASET.get("View" + str(viewIndex)).get("data").value, + DATASET.get("View" + str(viewIndex)).get("indices").value, + DATASET.get("View" + str(viewIndex)).get("indptr").value), + shape=DATASET.get("View" + str(viewIndex)).attrs["shape"])[ + usedIndices, :][ np.argsort(sortedIndices), :] return sparse_mat @@ -40,7 +44,6 @@ def getShape(DATASET, viewIndex): return DATASET.get("View" + str(viewIndex)).attrs["shape"] - def getValue(DATASET): """Used to get the value of a view in the HDF5 dataset even if it sparse""" if not DATASET.attrs["sparse"]: @@ -60,15 +63,17 @@ def extractSubset(matrix, usedIndices): oldindptr = matrix.indptr for exampleIndexIndex, exampleIndex in enumerate(usedIndices): newIndptr[exampleIndexIndex + 1] = newIndptr[exampleIndexIndex] + ( - oldindptr[exampleIndex + 1] - oldindptr[exampleIndex]) + oldindptr[exampleIndex + 1] - oldindptr[exampleIndex]) newData = np.ones(newIndptr[-1], dtype=bool) newIndices = np.zeros(newIndptr[-1], dtype=int) oldIndices = matrix.indices for exampleIndexIndex, exampleIndex in enumerate(usedIndices): - newIndices[newIndptr[exampleIndexIndex]:newIndptr[exampleIndexIndex + 1]] = oldIndices[ - oldindptr[exampleIndex]: - oldindptr[exampleIndex + 1]] - return sparse.csr_matrix((newData, newIndices, newIndptr), shape=(len(usedIndices), matrix.shape[1])) + newIndices[newIndptr[exampleIndexIndex]:newIndptr[ + exampleIndexIndex + 1]] = oldIndices[ + oldindptr[exampleIndex]: + oldindptr[exampleIndex + 1]] + return sparse.csr_matrix((newData, newIndices, newIndptr), + shape=(len(usedIndices), matrix.shape[1])) else: return matrix[usedIndices] @@ -94,13 +99,16 @@ def initMultipleDatasets(pathF, name, nbCores): """ if nbCores > 1: if DB.datasetsAlreadyExist(pathF, name, nbCores): - logging.debug("Info:\t Enough copies of the dataset are already available") + logging.debug( + "Info:\t Enough copies of the dataset are already available") pass else: - logging.debug("Start:\t Creating " + str(nbCores) + " temporary datasets for multiprocessing") - logging.warning(" WARNING : /!\ This may use a lot of HDD storage space : " + - str(os.path.getsize(pathF + name + ".hdf5") * nbCores / float( - 1024) / 1000 / 1000) + " Gbytes /!\ ") + logging.debug("Start:\t Creating " + str( + nbCores) + " temporary datasets for multiprocessing") + logging.warning( + " WARNING : /!\ This may use a lot of HDD storage space : " + + str(os.path.getsize(pathF + name + ".hdf5") * nbCores / float( + 1024) / 1000 / 1000) + " Gbytes /!\ ") confirmation = confirm() if not confirmation: sys.exit(0) @@ -125,7 +133,8 @@ def confirm(resp=True, timeout=15): def input_(timeout=15): """used as a UI to stop if too much HDD space will be used""" - logging.warning("You have " + str(timeout) + " seconds to stop the dataset copy by typing n") + logging.warning("You have " + str( + timeout) + " seconds to stop the dataset copy by typing n") i, o, e = select.select([sys.stdin], [], [], timeout) if i: return sys.stdin.readline().strip() @@ -138,4 +147,4 @@ def getMonoviewShared(path, name, viewName, labelsNames, classificationIndices): HDF5_dataset_file = h5py.File(path + name + ".hdf5", "w") X = HDF5_dataset_file.get(viewName).value Y = HDF5_dataset_file.get("Labels").value - return X, Y \ No newline at end of file + return X, Y diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py index 6484589ae10d26e78e64668c6e0a86127eaefc95..21267f61e4d1b580408b8301e33cb00fffdc5494 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py @@ -1,12 +1,8 @@ -import numpy as np -import math -from scipy import sparse +import errno import os -import logging + import h5py -import operator -import errno -import csv +import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_array @@ -24,11 +20,13 @@ def copyHDF5(pathF, name, nbCores): datasetFile.copy("/" + dataset, newDataSet["/"]) newDataSet.close() + class TanhNormalizer(BaseEstimator, TransformerMixin): """Normalize data using a tanh function. This is the normalizer used in the so-called "Never-ending paper". It remains here for reproduceability purposes, but you should use Scikit-Learn normalizers instead! """ + def __init__(self): self.mean = None self.std = None @@ -50,14 +48,13 @@ class TanhNormalizer(BaseEstimator, TransformerMixin): return self.transform(X) - - def datasetsAlreadyExist(pathF, name, nbCores): """Used to check if it's necessary to copy datasets""" allDatasetExist = True for coreIndex in range(nbCores): import os.path - allDatasetExist *= os.path.isfile(pathF + name + str(coreIndex) + ".hdf5") + allDatasetExist *= os.path.isfile( + pathF + name + str(coreIndex) + ".hdf5") return allDatasetExist @@ -70,7 +67,8 @@ def deleteHDF5(pathF, name, nbCores): def makeMeNoisy(viewData, randomState, percentage=15): """used to introduce some noise in the generated data""" viewData = viewData.astype(bool) - nbNoisyCoord = int(percentage / 100.0 * viewData.shape[0] * viewData.shape[1]) + nbNoisyCoord = int( + percentage / 100.0 * viewData.shape[0] * viewData.shape[1]) rows = range(viewData.shape[0]) cols = range(viewData.shape[1]) for _ in range(nbNoisyCoord): @@ -81,7 +79,9 @@ def makeMeNoisy(viewData, randomState, percentage=15): return noisyViewData -def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, add_noise=False, noise_std=0.15, nbView=3, +def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", + randomState=None, full=True, add_noise=False, + noise_std=0.15, nbView=3, nbClass=2, datasetLength=34, randomStateInt=None): """Used to generate a plausible dataset to test the algorithms""" randomStateInt = 42 @@ -95,17 +95,29 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", random raise datasetFile = h5py.File(pathF + "/Plausible.hdf5", "w") if NB_CLASS == 2: - CLASS_LABELS = np.array([0 for _ in range(int(datasetLength/2))] + [1 for _ in range(datasetLength-int(datasetLength/2))]) + CLASS_LABELS = np.array( + [0 for _ in range(int(datasetLength / 2))] + [1 for _ in range( + datasetLength - int(datasetLength / 2))]) for viewIndex in range(nbView): - viewData = np.array([np.zeros(nbFeatures) for _ in range(int(datasetLength/2))] + - [np.ones(nbFeatures)for _ in range(datasetLength-int(datasetLength/2))]) - fakeOneIndices = randomState.randint(0, int(datasetLength/2), int(datasetLength / 12)) - fakeZeroIndices = randomState.randint(int(datasetLength/2), datasetLength, int(datasetLength / 12)) - - viewData[fakeOneIndices] = np.ones((len(fakeOneIndices), nbFeatures)) - viewData[fakeZeroIndices] = np.zeros((len(fakeZeroIndices), nbFeatures)) + viewData = np.array( + [np.zeros(nbFeatures) for _ in range(int(datasetLength / 2))] + + [np.ones(nbFeatures) for _ in + range(datasetLength - int(datasetLength / 2))]) + fakeOneIndices = randomState.randint(0, int(datasetLength / 2), + int(datasetLength / 12)) + fakeZeroIndices = randomState.randint(int(datasetLength / 2), + datasetLength, + int(datasetLength / 12)) + + viewData[fakeOneIndices] = np.ones( + (len(fakeOneIndices), nbFeatures)) + viewData[fakeZeroIndices] = np.zeros( + (len(fakeZeroIndices), nbFeatures)) viewData = makeMeNoisy(viewData, randomState) - viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewData.shape, data=viewData.astype(np.uint8)) + viewDset = datasetFile.create_dataset("View" + str(viewIndex), + viewData.shape, + data=viewData.astype( + np.uint8)) viewDset.attrs["name"] = "ViewNumber" + str(viewIndex) viewDset.attrs["sparse"] = False labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) @@ -122,36 +134,53 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", random return datasetFile, LABELS_DICTIONARY elif NB_CLASS >= 3: firstBound = int(datasetLength / 3) - rest = datasetLength - 2*int(datasetLength / 3) - scndBound = 2*int(datasetLength / 3) + rest = datasetLength - 2 * int(datasetLength / 3) + scndBound = 2 * int(datasetLength / 3) thrdBound = datasetLength - CLASS_LABELS = np.array([0 for _ in range(firstBound)] + [1 for _ in range(firstBound)] + [2 for _ in range(rest)]) + CLASS_LABELS = np.array( + [0 for _ in range(firstBound)] + [1 for _ in range(firstBound)] + [2 + for + _ + in + range( + rest)]) for viewIndex in range(nbView): - viewData = np.array([np.zeros(nbFeatures) for _ in range(firstBound)] + - [np.ones(nbFeatures)for _ in range(firstBound)] + - [np.ones(nbFeatures)+1 for _ in range(rest)]) - fakeOneIndices = randomState.randint(0, firstBound, int(datasetLength / 12)) - fakeTwoIndices = randomState.randint(firstBound, scndBound, int(datasetLength / 12)) - fakeZeroIndices = randomState.randint(scndBound, thrdBound, int(datasetLength / 12)) - - viewData[fakeOneIndices] = np.ones((len(fakeOneIndices), nbFeatures)) - viewData[fakeZeroIndices] = np.zeros((len(fakeZeroIndices), nbFeatures)) - viewData[fakeTwoIndices] = np.ones((len(fakeTwoIndices), nbFeatures))+1 + viewData = np.array( + [np.zeros(nbFeatures) for _ in range(firstBound)] + + [np.ones(nbFeatures) for _ in range(firstBound)] + + [np.ones(nbFeatures) + 1 for _ in range(rest)]) + fakeOneIndices = randomState.randint(0, firstBound, + int(datasetLength / 12)) + fakeTwoIndices = randomState.randint(firstBound, scndBound, + int(datasetLength / 12)) + fakeZeroIndices = randomState.randint(scndBound, thrdBound, + int(datasetLength / 12)) + + viewData[fakeOneIndices] = np.ones( + (len(fakeOneIndices), nbFeatures)) + viewData[fakeZeroIndices] = np.zeros( + (len(fakeZeroIndices), nbFeatures)) + viewData[fakeTwoIndices] = np.ones( + (len(fakeTwoIndices), nbFeatures)) + 1 viewData = makeMeNoisy(viewData, randomState) - viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewData.shape, data=viewData.astype(np.uint8)) + viewDset = datasetFile.create_dataset("View" + str(viewIndex), + viewData.shape, + data=viewData.astype( + np.uint8)) viewDset.attrs["name"] = "ViewNumber" + str(viewIndex) viewDset.attrs["sparse"] = False labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) labelsDset[...] = CLASS_LABELS labelsDset.attrs["name"] = "Labels" - labelsDset.attrs["names"] = ["No".encode(), "Yes".encode(), "Maybe".encode()] + labelsDset.attrs["names"] = ["No".encode(), "Yes".encode(), + "Maybe".encode()] metaDataGrp = datasetFile.create_group("Metadata") metaDataGrp.attrs["nbView"] = nbView metaDataGrp.attrs["nbClass"] = 3 metaDataGrp.attrs["datasetLength"] = len(CLASS_LABELS) datasetFile.close() datasetFile = h5py.File(pathF + "Plausible.hdf5", "r") - LABELS_DICTIONARY = {0: "No", 1: "Yes", 2:"Maybe"} + LABELS_DICTIONARY = {0: "No", 1: "Yes", 2: "Maybe"} return datasetFile, LABELS_DICTIONARY, "Plausible" @@ -207,7 +236,6 @@ def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", random # return datasetFile, LABELS_DICTIONARY - class DatasetError(Exception): def __init__(self, *args, **kwargs): Exception.__init__(self, *args, **kwargs) @@ -231,17 +259,21 @@ def allAskedLabelsAreAvailable(askedLabelsNamesSet, availableLabelsNames): return True -def fillLabelNames(NB_CLASS, askedLabelsNames, randomState, availableLabelsNames): +def fillLabelNames(NB_CLASS, askedLabelsNames, randomState, + availableLabelsNames): if len(askedLabelsNames) < NB_CLASS: - nbLabelsToAdd = NB_CLASS-len(askedLabelsNames) - labelsNamesToChoose = [availableLabelName for availableLabelName in availableLabelsNames + nbLabelsToAdd = NB_CLASS - len(askedLabelsNames) + labelsNamesToChoose = [availableLabelName for availableLabelName in + availableLabelsNames if availableLabelName not in askedLabelsNames] - addedLabelsNames = randomState.choice(labelsNamesToChoose, nbLabelsToAdd, replace=False) + addedLabelsNames = randomState.choice(labelsNamesToChoose, + nbLabelsToAdd, replace=False) askedLabelsNames = list(askedLabelsNames) + list(addedLabelsNames) askedLabelsNamesSet = set(askedLabelsNames) elif len(askedLabelsNames) > NB_CLASS: - askedLabelsNames = list(randomState.choice(askedLabelsNames, NB_CLASS, replace=False)) + askedLabelsNames = list( + randomState.choice(askedLabelsNames, NB_CLASS, replace=False)) askedLabelsNamesSet = set(askedLabelsNames) else: @@ -257,30 +289,41 @@ def getAllLabels(fullLabels, availableLabelsNames): return newLabels, newLabelsNames, usedIndices -def selectAskedLabels(askedLabelsNamesSet, availableLabelsNames, askedLabelsNames, fullLabels): +def selectAskedLabels(askedLabelsNamesSet, availableLabelsNames, + askedLabelsNames, fullLabels): if allAskedLabelsAreAvailable(askedLabelsNamesSet, availableLabelsNames): - usedLabels = [availableLabelsNames.index(askedLabelName) for askedLabelName in askedLabelsNames] - usedIndices = np.array([labelIndex for labelIndex, label in enumerate(fullLabels) if label in usedLabels]) - newLabels = np.array([usedLabels.index(label) for label in fullLabels if label in usedLabels]) - newLabelsNames = [availableLabelsNames[usedLabel] for usedLabel in usedLabels] + usedLabels = [availableLabelsNames.index(askedLabelName) for + askedLabelName in askedLabelsNames] + usedIndices = np.array( + [labelIndex for labelIndex, label in enumerate(fullLabels) if + label in usedLabels]) + newLabels = np.array([usedLabels.index(label) for label in fullLabels if + label in usedLabels]) + newLabelsNames = [availableLabelsNames[usedLabel] for usedLabel in + usedLabels] return newLabels, newLabelsNames, usedIndices else: raise DatasetError("Asked labels are not all available in the dataset") -def filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, availableLabelsNames, askedLabelsNames): +def filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, + availableLabelsNames, askedLabelsNames): if len(labelsSet) > 2: if askedLabelsNames == availableLabelsNames: - newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, availableLabelsNames) + newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, + availableLabelsNames) elif len(askedLabelsNamesSet) <= len(labelsSet): - newLabels, newLabelsNames, usedIndices = selectAskedLabels(askedLabelsNamesSet, availableLabelsNames, - askedLabelsNames, fullLabels) + newLabels, newLabelsNames, usedIndices = selectAskedLabels( + askedLabelsNamesSet, availableLabelsNames, + askedLabelsNames, fullLabels) else: - raise DatasetError("Asked more labels than available in the dataset. Available labels are : "+ - ", ".join(availableLabelsNames)) + raise DatasetError( + "Asked more labels than available in the dataset. Available labels are : " + + ", ".join(availableLabelsNames)) else: - newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, availableLabelsNames) + newLabels, newLabelsNames, usedIndices = getAllLabels(fullLabels, + availableLabelsNames) return newLabels, newLabelsNames, usedIndices @@ -288,17 +331,22 @@ def filterViews(datasetFile, temp_dataset, views, usedIndices): newViewIndex = 0 if views == [""]: for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): - copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex), "View" + str(viewIndex), usedIndices) + copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex), + "View" + str(viewIndex), usedIndices) for askedViewName in views: for viewIndex in range(datasetFile.get("Metadata").attrs["nbView"]): viewName = datasetFile.get("View" + str(viewIndex)).attrs["name"] if type(viewName) == bytes: viewName = viewName.decode("utf-8") if viewName == askedViewName: - copyhdf5Dataset(datasetFile, temp_dataset, "View" + str(viewIndex), "View" + str(newViewIndex), usedIndices) - newViewName = temp_dataset.get("View"+str(newViewIndex)).attrs["name"] + copyhdf5Dataset(datasetFile, temp_dataset, + "View" + str(viewIndex), + "View" + str(newViewIndex), usedIndices) + newViewName = \ + temp_dataset.get("View" + str(newViewIndex)).attrs["name"] if type(newViewName) == bytes: - temp_dataset.get("View"+str(newViewIndex)).attrs["name"] = newViewName.decode("utf-8") + temp_dataset.get("View" + str(newViewIndex)).attrs[ + "name"] = newViewName.decode("utf-8") newViewIndex += 1 else: @@ -306,11 +354,15 @@ def filterViews(datasetFile, temp_dataset, views, usedIndices): temp_dataset.get("Metadata").attrs["nbView"] = len(views) -def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, destinationDatasetName, usedIndices): +def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, + destinationDatasetName, usedIndices): """Used to copy a view in a new dataset file using only the examples of usedIndices, and copying the args""" newDset = destinationDataFile.create_dataset(destinationDatasetName, - data=sourceDataFile.get(sourceDatasetName).value[usedIndices,:]) - if "sparse" in sourceDataFile.get(sourceDatasetName).attrs.keys() and sourceDataFile.get(sourceDatasetName).attrs["sparse"]: + data=sourceDataFile.get( + sourceDatasetName).value[ + usedIndices, :]) + if "sparse" in sourceDataFile.get(sourceDatasetName).attrs.keys() and \ + sourceDataFile.get(sourceDatasetName).attrs["sparse"]: # TODO : Support sparse pass else: @@ -318,86 +370,108 @@ def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, dest newDset.attrs[key] = value -def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, add_noise=False, noise_std=0.15): +def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, + randomState, full=False, add_noise=False, noise_std=0.15): """Used to load a hdf5 database""" if full: datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") dataset_name = nameDB - labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(datasetFile.get("Labels").attrs["names"])) + labelsDictionary = dict( + (labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in + enumerate(datasetFile.get("Labels").attrs["names"])) else: - askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in askedLabelsNames] + askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in + askedLabelsNames] baseDatasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") fullLabels = baseDatasetFile.get("Labels").value - datasetFile = h5py.File(pathF+nameDB+"_temp_view_label_select.hdf5", "w") - dataset_name = nameDB+"_temp_view_label_select" + datasetFile = h5py.File(pathF + nameDB + "_temp_view_label_select.hdf5", + "w") + dataset_name = nameDB + "_temp_view_label_select" baseDatasetFile.copy("Metadata", datasetFile) labelsSet = getClasses(fullLabels) - availableLabelsNames = list(baseDatasetFile.get("Labels").attrs["names"]) - askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, askedLabelsNames, - randomState, availableLabelsNames) - - newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, - availableLabelsNames, askedLabelsNames) + availableLabelsNames = list( + baseDatasetFile.get("Labels").attrs["names"]) + askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, + askedLabelsNames, + randomState, + availableLabelsNames) + + newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, + askedLabelsNamesSet, + fullLabels, + availableLabelsNames, + askedLabelsNames) datasetFile.get("Metadata").attrs["datasetLength"] = len(usedIndices) datasetFile.get("Metadata").attrs["nbClass"] = NB_CLASS datasetFile.create_dataset("Labels", data=newLabels) datasetFile.get("Labels").attrs["names"] = newLabelsNames filterViews(baseDatasetFile, datasetFile, views, usedIndices) - labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(datasetFile.get("Labels").attrs["names"])) + labelsDictionary = dict( + (labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in + enumerate(datasetFile.get("Labels").attrs["names"])) if add_noise: - datasetFile, dataset_name = add_gaussian_noise(datasetFile, randomState, pathF, dataset_name, noise_std) + datasetFile, dataset_name = add_gaussian_noise(datasetFile, randomState, + pathF, dataset_name, + noise_std) else: pass return datasetFile, labelsDictionary, dataset_name -def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, noise_std=0.15): +def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, + noise_std=0.15): """In this function, we add a guaussian noise centered in 0 with specified std to each view, according to it's range (the noise will be mutliplied by this range) and we crop the noisy signal according to the view's attributes limits. This is done by creating a new dataset, to keep clean data.""" - noisy_dataset = h5py.File(path_f+dataset_name+"_noised.hdf5", "w") + noisy_dataset = h5py.File(path_f + dataset_name + "_noised.hdf5", "w") dataset_file.copy("Metadata", noisy_dataset) dataset_file.copy("Labels", noisy_dataset) for view_index in range(dataset_file.get("Metadata").attrs["nbView"]): - dataset_file.copy("View"+str(view_index), noisy_dataset) + dataset_file.copy("View" + str(view_index), noisy_dataset) # dataset_file.close() for view_index in range(noisy_dataset.get("Metadata").attrs["nbView"]): view_name = "View" + str(view_index) view_dset = noisy_dataset.get(view_name) # orig_shape = view_dset.value.shape - view_limits = dataset_file["Metadata/View"+str(view_index)+"_limits"].value - view_ranges = view_limits[:,1]-view_limits[:,0] + view_limits = dataset_file[ + "Metadata/View" + str(view_index) + "_limits"].value + view_ranges = view_limits[:, 1] - view_limits[:, 0] normal_dist = random_state.normal(0, noise_std, view_dset.value.shape) - noise = normal_dist*view_ranges - noised_data = view_dset.value+noise - noised_data = np.where(noised_data<view_limits[:,0], view_limits[:,0], noised_data) - noised_data = np.where(noised_data>view_limits[:,1], view_limits[:,1], noised_data) + noise = normal_dist * view_ranges + noised_data = view_dset.value + noise + noised_data = np.where(noised_data < view_limits[:, 0], + view_limits[:, 0], noised_data) + noised_data = np.where(noised_data > view_limits[:, 1], + view_limits[:, 1], noised_data) noisy_dataset[view_name][...] = noised_data # final_shape = noised_data.shape - return noisy_dataset, dataset_name+"_noised" + return noisy_dataset, dataset_name + "_noised" - - -def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, add_noise=False, noise_std=0.15, delimiter=","): +def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, + randomState, full=False, add_noise=False, noise_std=0.15, + delimiter=","): # TODO : Update this one - labelsNames = np.genfromtxt(pathF + nameDB + "-labels-names.csv", dtype='str', delimiter=delimiter) + labelsNames = np.genfromtxt(pathF + nameDB + "-labels-names.csv", + dtype='str', delimiter=delimiter) datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") labels = np.genfromtxt(pathF + nameDB + "-labels.csv", delimiter=delimiter) labelsDset = datasetFile.create_dataset("Labels", labels.shape, data=labels) - labelsDset.attrs["names"] = [labelName.encode() for labelName in labelsNames] - viewFileNames = [viewFileName for viewFileName in os.listdir(pathF+"Views/")] - for viewIndex, viewFileName in enumerate(os.listdir(pathF+"Views/")): + labelsDset.attrs["names"] = [labelName.encode() for labelName in + labelsNames] + viewFileNames = [viewFileName for viewFileName in + os.listdir(pathF + "Views/")] + for viewIndex, viewFileName in enumerate(os.listdir(pathF + "Views/")): viewFile = pathF + "Views/" + viewFileName if viewFileName[-6:] != "-s.csv": viewMatrix = np.genfromtxt(viewFile, delimiter=delimiter) - viewDset = datasetFile.create_dataset("View" + str(viewIndex), viewMatrix.shape, data=viewMatrix) + viewDset = datasetFile.create_dataset("View" + str(viewIndex), + viewMatrix.shape, + data=viewMatrix) del viewMatrix viewDset.attrs["name"] = viewFileName[:-4] viewDset.attrs["sparse"] = False @@ -408,7 +482,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomStat metaDataGrp.attrs["nbClass"] = len(labelsNames) metaDataGrp.attrs["datasetLength"] = len(labels) datasetFile.close() - datasetFile, labelsDictionary = getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full) + datasetFile, labelsDictionary = getClassicDBhdf5(views, pathF, nameDB, + NB_CLASS, askedLabelsNames, + randomState, full) return datasetFile, labelsDictionary @@ -476,7 +552,6 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomStat # return usedIndices - # def getCaltechDBcsv(views, pathF, nameDB, NB_CLASS, LABELS_NAMES, randomState): # datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") # labelsNamesFile = open(pathF + nameDB + '-ClassLabels-Description.csv') @@ -510,11 +585,11 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomStat # datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") # return datasetFile, labelsDictionary -#--------------------------------------------# +# --------------------------------------------# # All the functions below are not useful # # anymore but the binarization methods in # # it must be kept # -#--------------------------------------------# +# --------------------------------------------# # def getMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES, randomState): diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py b/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py index 48126d3a883349ce3a19481b0804e6ef041cbf00..84e03d89b5d569506dcfe1f277058807107dba35 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/HyperParameterSearch.py @@ -1,12 +1,15 @@ -import numpy as np +import itertools import sys + import matplotlib.pyplot as plt -import itertools +import numpy as np from .. import Metrics -def searchBestSettings(dataset, labels, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, +def searchBestSettings(dataset, labels, classifierPackage, classifierName, + metrics, iLearningIndices, iKFolds, randomState, + viewsIndices=None, searchingTool="randomizedSearch", nIter=1, **kwargs): """Used to select the right hyperparam optimization function to optimize hyper parameters""" if viewsIndices is None: @@ -14,17 +17,23 @@ def searchBestSettings(dataset, labels, classifierPackage, classifierName, metri thismodule = sys.modules[__name__] searchingTool = "randomizedSearch" # Todo find a nice way to configure multiview classifier without hp search searchingToolMethod = getattr(thismodule, searchingTool) - bestSettings = searchingToolMethod(dataset, labels, classifierPackage, classifierName, metrics, iLearningIndices, iKFolds, randomState, - viewsIndices=viewsIndices, nIter=nIter, **kwargs) + bestSettings = searchingToolMethod(dataset, labels, classifierPackage, + classifierName, metrics, + iLearningIndices, iKFolds, randomState, + viewsIndices=viewsIndices, nIter=nIter, + **kwargs) return bestSettings # or well set clasifier ? -def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs): +def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, + **kwargs): """Used to perfom gridsearch on the classifiers""" pass -def randomizedSearch(dataset, labels, classifierPackage, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1, +def randomizedSearch(dataset, labels, classifierPackage, classifierName, + metrics, learningIndices, KFolds, randomState, + viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): """Used to perform a random search on the classifiers to optimize hyper parameters""" if viewsIndices is None: @@ -32,13 +41,15 @@ def randomizedSearch(dataset, labels, classifierPackage, classifierName, metrics metric = metrics[0] metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: metricKWARGS = {} - classifierModule = getattr(classifierPackage, classifierName+"Module") - classifierClass = getattr(classifierModule, classifierName+"Class") + classifierModule = getattr(classifierPackage, classifierName + "Module") + classifierClass = getattr(classifierModule, classifierName + "Class") if classifierName != "Mumbo": - paramsSets = classifierModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter) + paramsSets = classifierModule.genParamsSets(classificationKWARGS, + randomState, nIter=nIter) if metricModule.getConfig()[-14] == "h": baseScore = -1000.0 isBetter = "higher" @@ -50,12 +61,17 @@ def randomizedSearch(dataset, labels, classifierPackage, classifierName, metrics for paramsSet in paramsSets: scores = [] for trainIndices, testIndices in kFolds: - classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, + **classificationKWARGS) classifier.setParams(paramsSet) - classifier.fit_hdf5(dataset, labels, trainIndices=learningIndices[trainIndices], viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices], + classifier.fit_hdf5(dataset, labels, + trainIndices=learningIndices[trainIndices], + viewsIndices=viewsIndices) + testLabels = classifier.predict_hdf5(dataset, usedIndices= + learningIndices[testIndices], viewsIndices=viewsIndices) - testScore = metricModule.score(labels[learningIndices[testIndices]], testLabels) + testScore = metricModule.score( + labels[learningIndices[testIndices]], testLabels) scores.append(testScore) crossValScore = np.mean(np.array(scores)) @@ -65,20 +81,28 @@ def randomizedSearch(dataset, labels, classifierPackage, classifierName, metrics elif isBetter == "lower" and crossValScore < baseScore: baseScore = crossValScore bestSettings = paramsSet - classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, + **classificationKWARGS) classifier.setParams(bestSettings) # TODO : This must be corrected else: - bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, labels, viewsIndices, classificationKWARGS, learningIndices, - randomState, metric=metric, nIter=nIter) + bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, labels, + viewsIndices, + classificationKWARGS, + learningIndices, + randomState, + metric=metric, + nIter=nIter) classificationKWARGS["classifiersConfigs"] = bestConfigs - classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, + **classificationKWARGS) return classifier -def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, **kwargs): +def spearMint(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, + **kwargs): """Used to perform spearmint on the classifiers to optimize hyper parameters, longer than randomsearch (can't be parallelized)""" pass @@ -89,7 +113,7 @@ def genHeatMaps(params, scoresArray, outputFileName): nbParams = len(params) if nbParams > 2: combinations = itertools.combinations(range(nbParams), 2) - elif nbParams==2: + elif nbParams == 2: combinations = [(0, 1)] else: combinations = [()] @@ -104,7 +128,8 @@ def genHeatMaps(params, scoresArray, outputFileName): paramArray1Set = np.sort(np.array(list(set(paramArray1)))) paramArray2Set = np.sort(np.array(list(set(paramArray2)))) - scoresMatrix = np.zeros((len(paramArray2Set), len(paramArray1Set))) - 0.1 + scoresMatrix = np.zeros( + (len(paramArray2Set), len(paramArray1Set))) - 0.1 for param1, param2, score in zip(paramArray1, paramArray2, scoresArray): param1Index, = np.where(paramArray1Set == param1) param2Index, = np.where(paramArray2Set == param2) @@ -120,7 +145,8 @@ def genHeatMaps(params, scoresArray, outputFileName): plt.xticks(np.arange(len(paramArray1Set)), paramArray1Set) plt.yticks(np.arange(len(paramArray2Set)), paramArray2Set, rotation=45) plt.title('Validation metric') - plt.savefig(outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png") + plt.savefig( + outputFileName + "heat_map-" + paramName1 + "-" + paramName2 + ".png") plt.close() # nohup python ~/dev/git/spearmint/spearmint/main.py . & diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/Multiclass.py b/multiview_platform/MonoMultiViewClassifiers/utils/Multiclass.py index b164d9f7e81531fa1ff484849611f049273de1d1..9d4e19bdd0972dc8d7b4f428c906d42d9f1c1488 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/Multiclass.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/Multiclass.py @@ -1,6 +1,7 @@ -import numpy as np import itertools +import numpy as np + def genMulticlassLabels(labels, multiclassMethod, splits): r"""Used to gen the train/test splits and to set up the framework of the adaptation of a multiclass dataset @@ -45,7 +46,7 @@ def genMulticlassLabels(labels, multiclassMethod, splits): splits = [[trainIndices for trainIndices, _ in splits], [testIndices for _, testIndices in splits], [[] for _ in splits]] - return [labels], [(0,1)], [splits] + return [labels], [(0, 1)], [splits] else: combinations = itertools.combinations(np.arange(nbLabels), 2) multiclassLabels = [] @@ -54,15 +55,20 @@ def genMulticlassLabels(labels, multiclassMethod, splits): for combination in combinations: labelsIndices.append(combination) oldIndices = [exampleIndex - for exampleIndex, exampleLabel in enumerate(labels) + for exampleIndex, exampleLabel in + enumerate(labels) if exampleLabel in combination] - trainIndices = [np.array([oldIndex for oldIndex in oldIndices if oldIndex in iterIndices[0]]) + trainIndices = [np.array([oldIndex for oldIndex in oldIndices if + oldIndex in iterIndices[0]]) for iterIndices in splits] - testIndices = [np.array([oldIndex for oldIndex in oldIndices if oldIndex in iterindices[1]]) + testIndices = [np.array([oldIndex for oldIndex in oldIndices if + oldIndex in iterindices[1]]) for iterindices in splits] - testIndicesMulticlass = [np.array(iterindices[1]) for iterindices in splits] - indicesMulticlass.append([trainIndices, testIndices, testIndicesMulticlass]) - newLabels = np.zeros(len(labels), dtype=int)-100 + testIndicesMulticlass = [np.array(iterindices[1]) for + iterindices in splits] + indicesMulticlass.append( + [trainIndices, testIndices, testIndicesMulticlass]) + newLabels = np.zeros(len(labels), dtype=int) - 100 for labelIndex, label in enumerate(labels): if label == combination[0]: newLabels[labelIndex] = 1 @@ -81,7 +87,8 @@ def genMulticlassLabels(labels, multiclassMethod, splits): def genMulticlassMonoviewDecision(monoviewResult, classificationIndices): learningIndices, validationIndices, testIndicesMulticlass = classificationIndices multiclassMonoviewDecisions = monoviewResult.full_labels_pred - multiclassMonoviewDecisions[testIndicesMulticlass] = monoviewResult.y_test_multiclass_pred + multiclassMonoviewDecisions[ + testIndicesMulticlass] = monoviewResult.y_test_multiclass_pred return multiclassMonoviewDecisions @@ -89,4 +96,4 @@ def isBiclass(multiclass_preds): if multiclass_preds[0] is []: return True else: - return False \ No newline at end of file + return False diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/MultiviewResultAnalysis.py b/multiview_platform/MonoMultiViewClassifiers/utils/MultiviewResultAnalysis.py index eed3ef9b73e5877792dcd81c9be104a8e273e07b..1e8ec1c69dd311ce5b63fd7e8751a21a884f9c93 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/MultiviewResultAnalysis.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/MultiviewResultAnalysis.py @@ -4,34 +4,44 @@ from .. import Metrics __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + def printMetricScore(metricScores, metrics): metricScoreString = "\n\n" for metric in metrics: metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: metricKWARGS = {} - metricScoreString += "\tFor " + metricModule.getConfig(**metricKWARGS) + " : " - metricScoreString += "\n\t\t- Score on train : " + str(metricScores[metric[0]][0]) - metricScoreString += "\n\t\t- Score on test : " + str(metricScores[metric[0]][1]) + metricScoreString += "\tFor " + metricModule.getConfig( + **metricKWARGS) + " : " + metricScoreString += "\n\t\t- Score on train : " + str( + metricScores[metric[0]][0]) + metricScoreString += "\n\t\t- Score on test : " + str( + metricScores[metric[0]][1]) metricScoreString += "\n\n" return metricScoreString -def getTotalMetricScores(metric, trainLabels, testLabels, validationIndices, learningIndices, labels): +def getTotalMetricScores(metric, trainLabels, testLabels, validationIndices, + learningIndices, labels): metricModule = getattr(Metrics, metric[0]) if metric[1] is not None: - metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) + metricKWARGS = dict((index, metricConfig) for index, metricConfig in + enumerate(metric[1])) else: metricKWARGS = {} try: - trainScore = metricModule.score(labels[learningIndices], trainLabels, **metricKWARGS) + trainScore = metricModule.score(labels[learningIndices], trainLabels, + **metricKWARGS) except: print(labels[learningIndices]) print(trainLabels) - import pdb;pdb.set_trace() - testScore = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) + import pdb; + pdb.set_trace() + testScore = metricModule.score(labels[validationIndices], testLabels, + **metricKWARGS) return [trainScore, testScore] @@ -39,7 +49,8 @@ def getMetricsScores(metrics, trainLabels, testLabels, validationIndices, learningIndices, labels): metricsScores = {} for metric in metrics: - metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, testLabels, - validationIndices, learningIndices, labels) + metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, + testLabels, + validationIndices, + learningIndices, labels) return metricsScores - diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index f73756c5306f2e5364c0137e771eaf6dda27af75..7b83a19b46fa040fb060da8e20a5a7c91002c8d1 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -1,290 +1,390 @@ import argparse -import numpy as np +import logging +import os import pickle import time -import os -import errno -import logging -import sklearn +import numpy as np +import sklearn from . import GetMultiviewDb as DB + def parseTheArgs(arguments): """Used to parse the args entered by the user""" parser = argparse.ArgumentParser( - description='This file is used to benchmark the scores fo multiple classification algorithm on multiview data.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter,) + description='This file is used to benchmark the scores fo multiple ' + 'classification algorithm on multiview data.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + fromfile_prefix_chars='@') groupStandard = parser.add_argument_group('Standard arguments') - groupStandard.add_argument('-log', action='store_true', help='Use option to activate logging to console') - groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)', + groupStandard.add_argument('-log', action='store_true', + help='Use option to activate logging to console') + groupStandard.add_argument('--name', metavar='STRING', action='store', + help='Name of Database (default: %(default)s)', default='Plausible') groupStandard.add_argument('--label', metavar='STRING', action='store', - help='Labeling the results directory (default: %(default)s)', + help='Labeling the results directory (default: ' + '%(default)s)', default='') groupStandard.add_argument('--type', metavar='STRING', action='store', - help='Type of database : .hdf5 or .csv (default: %(default)s)', + help='Type of database : .hdf5 or .csv (' + 'default: %(default)s)', default='.hdf5') - groupStandard.add_argument('--views', metavar='STRING', action='store', nargs="+", - help='Name of the views selected for learning (default: %(default)s)', + groupStandard.add_argument('--views', metavar='STRING', action='store', + nargs="+", + help='Name of the views selected for learning ' + '(default: %(default)s)', default=['']) - groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the hdf5 dataset or database ' - 'folder (default: %(default)s)', + groupStandard.add_argument('--pathF', metavar='STRING', action='store', + help='Path to the hdf5 dataset or database ' + 'folder (default: %(default)s)', default='../Data/') - groupStandard.add_argument('--nice', metavar='INT', action='store', type=int, + groupStandard.add_argument('--nice', metavar='INT', action='store', + type=int, help='Niceness for the processes', default=0) - groupStandard.add_argument('--randomState', metavar='STRING', action='store', - help="The random state seed to use or the path to a pickle file where it is stored", + groupStandard.add_argument('--randomState', metavar='STRING', + action='store', + help="The random state seed to use or the path " + "to a pickle file where it is stored", default=None) - groupStandard.add_argument('--nbCores', metavar='INT', action='store', help='Number of cores to use for parallel ' - 'computing, -1 for all', + groupStandard.add_argument('--nbCores', metavar='INT', action='store', + help='Number of cores to use for parallel ' + 'computing, -1 for all', type=int, default=2) groupStandard.add_argument('--machine', metavar='STRING', action='store', - help='Type of machine on which the script runs', default="PC") - groupStandard.add_argument('-full', action='store_true', help='Use option to use full dataset and no labels or view filtering') + help='Type of machine on which the script runs', + default="PC") + groupStandard.add_argument('-full', action='store_true', + help='Use option to use full dataset and no ' + 'labels or view filtering') groupStandard.add_argument('-debug', action='store_true', help='Use option to bebug implemented algorithms') groupStandard.add_argument('-add_noise', action='store_true', help='Use option to add noise to the data') groupStandard.add_argument('--noise_std', metavar='FLOAT', action='store', - help='The std of the gaussian noise that will be added to the data.', + help='The std of the gaussian noise that will ' + 'be added to the data.', type=float, default=0.15) groupStandard.add_argument('--res_dir', metavar='STRING', action='store', help='The path to the result directory', default="../Results/") - groupClass = parser.add_argument_group('Classification arguments') - groupClass.add_argument('--CL_multiclassMethod', metavar='STRING', action='store', - help='Determine which multiclass method to use if the dataset is multiclass', + groupClass.add_argument('--CL_multiclassMethod', metavar='STRING', + action='store', + help='Determine which multiclass method to use if ' + 'the dataset is multiclass', default="oneVersusOne") groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', - help='Determine the split ratio between learning and validation sets', type=float, + help='Determine the split ratio between learning ' + 'and validation sets', + type=float, default=0.2) - groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation', + groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', + help='Number of folds in cross validation', type=int, default=5) - groupClass.add_argument('--CL_nbClass', metavar='INT', action='store', help='Number of classes, -1 for all', type=int, + groupClass.add_argument('--CL_nbClass', metavar='INT', action='store', + help='Number of classes, -1 for all', type=int, default=2) - groupClass.add_argument('--CL_classes', metavar='STRING', action='store', nargs="+", - help='Classes used in the dataset (names of the folders) if not filled, random classes will be ' + groupClass.add_argument('--CL_classes', metavar='STRING', action='store', + nargs="+", + help='Classes used in the dataset (names of the ' + 'folders) if not filled, random classes will ' + 'be ' 'selected', default=["yes", "no"]) - groupClass.add_argument('--CL_type', metavar='STRING', action='store', nargs="+", - help='Determine whether to use Multiview and/or Monoview, or Benchmark classification', + groupClass.add_argument('--CL_type', metavar='STRING', action='store', + nargs="+", + help='Determine whether to use Multiview and/or ' + 'Monoview, or Benchmark classification', default=['Monoview', 'Multiview']) - groupClass.add_argument('--CL_algos_monoview', metavar='STRING', action='store', nargs="+", - help='Determine which monoview classifier to use if empty, considering all', + groupClass.add_argument('--CL_algos_monoview', metavar='STRING', + action='store', nargs="+", + help='Determine which monoview classifier to use ' + 'if empty, considering all', default=['']) - groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store', nargs="+", - help='Determine which multiview classifier to use if empty, considering all', + groupClass.add_argument('--CL_algos_multiview', metavar='STRING', + action='store', nargs="+", + help='Determine which multiview classifier to use ' + 'if empty, considering all', default=['']) groupClass.add_argument('--CL_statsiter', metavar='INT', action='store', - help="Number of iteration for each algorithm to mean preds on different random states. " - "If using multiple cores, it's highly recommended to use statsiter mod nbCores == 0", + help="Number of iteration for each algorithm to " + "mean preds on different random states. " + "If using multiple cores, it's highly " + "recommended to use statsiter mod nbCores == " + "0", type=int, default=2) - groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+", - help='Determine which metrics to use, separate metric and configuration with ":".' - ' If multiple, separate with space. If no metric is specified, ' + groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', + nargs="+", + help='Determine which metrics to use, separate ' + 'metric and configuration with ":". ' + 'If multiple, separate with space. If no ' + 'metric is specified, ' 'considering all' , default=['']) - groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store', - help='Determine which metric to use for randomSearch and optimization', default="f1_score") + groupClass.add_argument('--CL_metric_princ', metavar='STRING', + action='store', + help='Determine which metric to use for ' + 'randomSearch and optimization', + default="f1_score") groupClass.add_argument('--CL_HPS_iter', metavar='INT', action='store', - help='Determine how many hyper parameters optimization tests to do', type=int, default=2) + help='Determine how many hyper parameters ' + 'optimization tests to do', + type=int, default=2) groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store', - help='Determine which hyperparamter search function use', default="randomizedSearch") + help='Determine which hyperparamter search ' + 'function use', + default="randomizedSearch") groupRF = parser.add_argument_group('Random Forest arguments') - groupRF.add_argument('--RF_trees', metavar='INT', type=int, action='store', help='Number max trees', + groupRF.add_argument('--RF_trees', metavar='INT', type=int, action='store', + help='Number max trees', default=25) - groupRF.add_argument('--RF_max_depth', metavar='INT', type=int, action='store', + groupRF.add_argument('--RF_max_depth', metavar='INT', type=int, + action='store', help='Max depth for the trees', default=5) - groupRF.add_argument('--RF_criterion', metavar='STRING', action='store', help='Criterion for the trees', + groupRF.add_argument('--RF_criterion', metavar='STRING', action='store', + help='Criterion for the trees', default="entropy") groupSVMLinear = parser.add_argument_group('Linear SVM arguments') - groupSVMLinear.add_argument('--SVML_C', metavar='INT', type=int, action='store', help='Penalty parameter used', + groupSVMLinear.add_argument('--SVML_C', metavar='INT', type=int, + action='store', help='Penalty parameter used', default=1) groupSVMRBF = parser.add_argument_group('SVW-RBF arguments') - groupSVMRBF.add_argument('--SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used', + groupSVMRBF.add_argument('--SVMRBF_C', metavar='INT', type=int, + action='store', help='Penalty parameter used', default=1) groupSVMPoly = parser.add_argument_group('Poly SVM arguments') - groupSVMPoly.add_argument('--SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used', + groupSVMPoly.add_argument('--SVMPoly_C', metavar='INT', type=int, + action='store', help='Penalty parameter used', default=1) - groupSVMPoly.add_argument('--SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used', + groupSVMPoly.add_argument('--SVMPoly_deg', metavar='INT', type=int, + action='store', help='Degree parameter used', default=2) groupAdaboost = parser.add_argument_group('Adaboost arguments') - groupAdaboost.add_argument('--Ada_n_est', metavar='INT', type=int, action='store', help='Number of estimators', + groupAdaboost.add_argument('--Ada_n_est', metavar='INT', type=int, + action='store', help='Number of estimators', default=2) - groupAdaboost.add_argument('--Ada_b_est', metavar='STRING', action='store', help='Estimators', + groupAdaboost.add_argument('--Ada_b_est', metavar='STRING', action='store', + help='Estimators', default='DecisionTreeClassifier') groupAdaboostPregen = parser.add_argument_group('AdaboostPregen arguments') groupAdaboostPregen.add_argument('--AdP_n_est', metavar='INT', type=int, - action='store', help='Number of estimators', - default=100) - groupAdaboostPregen.add_argument('--AdP_b_est', metavar='STRING', action='store', - help='Estimators', - default='DecisionTreeClassifier') + action='store', + help='Number of estimators', + default=100) + groupAdaboostPregen.add_argument('--AdP_b_est', metavar='STRING', + action='store', + help='Estimators', + default='DecisionTreeClassifier') groupAdaboostPregen.add_argument('--AdP_stumps', metavar='INT', type=int, action='store', - help='Number of stumps inthe pregenerated dataset', + help='Number of stumps inthe ' + 'pregenerated dataset', default=1) - - - groupAdaboostGraalpy = parser.add_argument_group('AdaboostGraalpy arguments') + groupAdaboostGraalpy = parser.add_argument_group( + 'AdaboostGraalpy arguments') groupAdaboostGraalpy.add_argument('--AdG_n_iter', metavar='INT', type=int, - action='store', - help='Number of estimators', - default=100) + action='store', + help='Number of estimators', + default=100) groupAdaboostGraalpy.add_argument('--AdG_stumps', metavar='INT', type=int, - action='store', - help='Number of stumps inthe pregenerated dataset', - default=1) + action='store', + help='Number of stumps inthe ' + 'pregenerated dataset', + default=1) groupDT = parser.add_argument_group('Decision Trees arguments') groupDT.add_argument('--DT_depth', metavar='INT', type=int, action='store', - help='Determine max depth for Decision Trees', default=3) - groupDT.add_argument('--DT_criterion', metavar='STRING', action='store', - help='Determine max depth for Decision Trees', default="entropy") - groupDT.add_argument('--DT_splitter', metavar='STRING', action='store', - help='Determine criterion for Decision Trees', default="random") - - groupDTP = parser.add_argument_group('Decision Trees pregen arguments') - groupDTP.add_argument('--DTP_depth', metavar='INT', type=int, action='store', help='Determine max depth for Decision Trees', default=3) - groupDTP.add_argument('--DTP_criterion', metavar='STRING', action='store', + groupDT.add_argument('--DT_criterion', metavar='STRING', action='store', help='Determine max depth for Decision Trees', default="entropy") - groupDTP.add_argument('--DTP_splitter', metavar='STRING', action='store', + groupDT.add_argument('--DT_splitter', metavar='STRING', action='store', help='Determine criterion for Decision Trees', default="random") - groupDTP.add_argument('--DTP_stumps', metavar='INT', type=int, action='store', - help='Determine the number of stumps for Decision Trees pregen', + + groupDTP = parser.add_argument_group('Decision Trees pregen arguments') + groupDTP.add_argument('--DTP_depth', metavar='INT', type=int, + action='store', + help='Determine max depth for Decision Trees', + default=3) + groupDTP.add_argument('--DTP_criterion', metavar='STRING', action='store', + help='Determine max depth for Decision Trees', + default="entropy") + groupDTP.add_argument('--DTP_splitter', metavar='STRING', action='store', + help='Determine criterion for Decision Trees', + default="random") + groupDTP.add_argument('--DTP_stumps', metavar='INT', type=int, + action='store', + help='Determine the number of stumps for Decision ' + 'Trees pregen', default=1) groupSGD = parser.add_argument_group('SGD arguments') - groupSGD.add_argument('--SGD_alpha', metavar='FLOAT', type=float, action='store', + groupSGD.add_argument('--SGD_alpha', metavar='FLOAT', type=float, + action='store', help='Determine alpha for SGDClassifier', default=0.1) groupSGD.add_argument('--SGD_loss', metavar='STRING', action='store', - help='Determine loss for SGDClassifier', default='log') + help='Determine loss for SGDClassifier', + default='log') groupSGD.add_argument('--SGD_penalty', metavar='STRING', action='store', - help='Determine penalty for SGDClassifier', default='l2') + help='Determine penalty for SGDClassifier', + default='l2') groupKNN = parser.add_argument_group('KNN arguments') - groupKNN.add_argument('--KNN_neigh', metavar='INT', type=int, action='store', - help='Determine number of neighbors for KNN', default=1) + groupKNN.add_argument('--KNN_neigh', metavar='INT', type=int, + action='store', + help='Determine number of neighbors for KNN', + default=1) groupKNN.add_argument('--KNN_weights', metavar='STRING', action='store', - help='Determine number of neighbors for KNN', default="distance") + help='Determine number of neighbors for KNN', + default="distance") groupKNN.add_argument('--KNN_algo', metavar='STRING', action='store', - help='Determine number of neighbors for KNN', default="auto") + help='Determine number of neighbors for KNN', + default="auto") groupKNN.add_argument('--KNN_p', metavar='INT', type=int, action='store', - help='Determine number of neighbors for KNN', default=1) + help='Determine number of neighbors for KNN', + default=1) groupSCM = parser.add_argument_group('SCM arguments') - groupSCM.add_argument('--SCM_max_rules', metavar='INT', type=int, action='store', + groupSCM.add_argument('--SCM_max_rules', metavar='INT', type=int, + action='store', help='Max number of rules for SCM', default=1) - groupSCM.add_argument('--SCM_p', metavar='FLOAT', type=float, action='store', + groupSCM.add_argument('--SCM_p', metavar='FLOAT', type=float, + action='store', help='Max number of rules for SCM', default=1.0) groupSCM.add_argument('--SCM_model_type', metavar='STRING', action='store', - help='Max number of rules for SCM', default="conjunction") + help='Max number of rules for SCM', + default="conjunction") groupSCMPregen = parser.add_argument_group('SCMPregen arguments') groupSCMPregen.add_argument('--SCP_max_rules', metavar='INT', type=int, - action='store', - help='Max number of rules for SCM', default=1) + action='store', + help='Max number of rules for SCM', default=1) groupSCMPregen.add_argument('--SCP_p', metavar='FLOAT', type=float, - action='store', - help='Max number of rules for SCM', default=1.0) - groupSCMPregen.add_argument('--SCP_model_type', metavar='STRING', action='store', - help='Max number of rules for SCM', - default="conjunction") + action='store', + help='Max number of rules for SCM', default=1.0) + groupSCMPregen.add_argument('--SCP_model_type', metavar='STRING', + action='store', + help='Max number of rules for SCM', + default="conjunction") groupSCMPregen.add_argument('--SCP_stumps', metavar='INT', type=int, action='store', - help='Number of stumps per attribute', default=1) + help='Number of stumps per attribute', + default=1) groupSCMSparsity = parser.add_argument_group('SCMSparsity arguments') groupSCMSparsity.add_argument('--SCS_max_rules', metavar='INT', type=int, - action='store', - help='Max number of rules for SCM', default=1) + action='store', + help='Max number of rules for SCM', default=1) groupSCMSparsity.add_argument('--SCS_stumps', metavar='INT', type=int, action='store', help='Number of stumps', default=1) groupSCMSparsity.add_argument('--SCS_p', metavar='FLOAT', type=float, - action='store', - help='Max number of rules for SCM', default=1.0) + action='store', + help='Max number of rules for SCM', + default=1.0) groupSCMSparsity.add_argument('--SCS_model_type', metavar='STRING', - action='store', - help='Max number of rules for SCM', - default="conjunction") + action='store', + help='Max number of rules for SCM', + default="conjunction") groupCQBoost = parser.add_argument_group('CQBoost arguments') - groupCQBoost.add_argument('--CQB_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for CQBoost', default=0.001) - groupCQBoost.add_argument('--CQB_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for CQBoost', default=1e-06) + groupCQBoost.add_argument('--CQB_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for CQBoost', + default=0.001) + groupCQBoost.add_argument('--CQB_epsilon', metavar='FLOAT', type=float, + action='store', + help='Set the epsilon parameter for CQBoost', + default=1e-06) groupCQBoost.add_argument('--CQB_stumps', metavar='INT', type=int, action='store', help='Set the number of stumps for CQBoost', default=1) groupCQBoost.add_argument('--CQB_n_iter', metavar='INT', type=int, - action='store', - help='Set the maximum number of iteration in CQBoost', - default=None) - - + action='store', + help='Set the maximum number of iteration in ' + 'CQBoost', + default=None) groupCQBoostv2 = parser.add_argument_group('CQBoostv2 arguments') - groupCQBoostv2.add_argument('--CQB2_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for CQBoostv2', default=0.002) - groupCQBoostv2.add_argument('--CQB2_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for CQBoostv2', default=1e-08) + groupCQBoostv2.add_argument('--CQB2_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for CQBoostv2', + default=0.002) + groupCQBoostv2.add_argument('--CQB2_epsilon', metavar='FLOAT', type=float, + action='store', + help='Set the epsilon parameter for CQBoostv2', + default=1e-08) groupCQBoostv21 = parser.add_argument_group('CQBoostv21 arguments') - groupCQBoostv21.add_argument('--CQB21_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for CQBoostv2', default=0.001) - groupCQBoostv21.add_argument('--CQB21_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for CQBoostv2', default=1e-08) + groupCQBoostv21.add_argument('--CQB21_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for CQBoostv2', + default=0.001) + groupCQBoostv21.add_argument('--CQB21_epsilon', metavar='FLOAT', type=float, + action='store', + help='Set the epsilon parameter for CQBoostv2', + default=1e-08) groupQarBoost = parser.add_argument_group('QarBoost arguments') - groupQarBoost.add_argument('--QarB_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for QarBoost', default=0.001) - groupQarBoost.add_argument('--QarB_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for QarBoost', default=1e-08) + groupQarBoost.add_argument('--QarB_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for QarBoost', + default=0.001) + groupQarBoost.add_argument('--QarB_epsilon', metavar='FLOAT', type=float, + action='store', + help='Set the epsilon parameter for QarBoost', + default=1e-08) groupCGreed = parser.add_argument_group('CGreed arguments') - groupCGreed.add_argument('--CGR_stumps', metavar='INT', type=int, action='store', - help='Set the n_stumps_per_attribute parameter for CGreed', default=1) - groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, action='store', - help='Set the n_max_iterations parameter for CGreed', default=100) + groupCGreed.add_argument('--CGR_stumps', metavar='INT', type=int, + action='store', + help='Set the n_stumps_per_attribute parameter ' + 'for CGreed', + default=1) + groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, + action='store', + help='Set the n_max_iterations parameter for ' + 'CGreed', + default=100) groupCGDesc = parser.add_argument_group('CGDesc arguments') groupCGDesc.add_argument('--CGD_stumps', metavar='INT', type=int, action='store', - help='Set the n_stumps_per_attribute parameter for CGreed', + help='Set the n_stumps_per_attribute parameter ' + 'for CGreed', default=1) groupCGDesc.add_argument('--CGD_n_iter', metavar='INT', type=int, action='store', - help='Set the n_max_iterations parameter for CGreed', + help='Set the n_max_iterations parameter for ' + 'CGreed', default=100) groupCGDescTree = parser.add_argument_group('CGDesc arguments') groupCGDescTree.add_argument('--CGDT_trees', metavar='INT', type=int, - action='store', - help='Set thenumber of trees for CGreed', - default=100) + action='store', + help='Set thenumber of trees for CGreed', + default=100) groupCGDescTree.add_argument('--CGDT_n_iter', metavar='INT', type=int, - action='store', - help='Set the n_max_iterations parameter for CGreed', - default=100) + action='store', + help='Set the n_max_iterations parameter for ' + 'CGreed', + default=100) groupCGDescTree.add_argument('--CGDT_max_depth', metavar='INT', type=int, action='store', help='Set the n_max_iterations parameter for CGreed', @@ -330,46 +430,51 @@ def parseTheArgs(arguments): groupSCMPregenTree = parser.add_argument_group('SCMPregenTree arguments') groupSCMPregenTree.add_argument('--SCPT_max_rules', metavar='INT', type=int, - action='store', - help='Max number of rules for SCM', default=1) - groupSCMPregenTree.add_argument('--SCPT_p', metavar='FLOAT', type=float, - action='store', - help='Max number of rules for SCM', default=1.0) - groupSCMPregenTree.add_argument('--SCPT_model_type', metavar='STRING', - action='store', - help='Max number of rules for SCM', - default="conjunction") - groupSCMPregenTree.add_argument('--SCPT_trees', metavar='INT', type=int, - action='store', - help='Number of stumps per attribute', - default=100) - groupSCMPregenTree.add_argument('--SCPT_max_depth', metavar='INT', type=int, - action='store', - help='Max_depth of the trees', - default=1) - - groupSCMSparsityTree = parser.add_argument_group('SCMSparsityTree arguments') - groupSCMSparsityTree.add_argument('--SCST_max_rules', metavar='INT', type=int, action='store', help='Max number of rules for SCM', default=1) - groupSCMSparsityTree.add_argument('--SCST_p', metavar='FLOAT', type=float, + groupSCMPregenTree.add_argument('--SCPT_p', metavar='FLOAT', type=float, action='store', help='Max number of rules for SCM', default=1.0) - groupSCMSparsityTree.add_argument('--SCST_model_type', metavar='STRING', + groupSCMPregenTree.add_argument('--SCPT_model_type', metavar='STRING', action='store', help='Max number of rules for SCM', default="conjunction") - groupSCMSparsityTree.add_argument('--SCST_trees', metavar='INT', type=int, + groupSCMPregenTree.add_argument('--SCPT_trees', metavar='INT', type=int, action='store', help='Number of stumps per attribute', default=100) - groupSCMSparsityTree.add_argument('--SCST_max_depth', metavar='INT', type=int, + groupSCMPregenTree.add_argument('--SCPT_max_depth', metavar='INT', type=int, action='store', help='Max_depth of the trees', default=1) + groupSCMSparsityTree = parser.add_argument_group( + 'SCMSparsityTree arguments') + groupSCMSparsityTree.add_argument('--SCST_max_rules', metavar='INT', + type=int, + action='store', + help='Max number of rules for SCM', + default=1) + groupSCMSparsityTree.add_argument('--SCST_p', metavar='FLOAT', type=float, + action='store', + help='Max number of rules for SCM', + default=1.0) + groupSCMSparsityTree.add_argument('--SCST_model_type', metavar='STRING', + action='store', + help='Max number of rules for SCM', + default="conjunction") + groupSCMSparsityTree.add_argument('--SCST_trees', metavar='INT', type=int, + action='store', + help='Number of stumps per attribute', + default=100) + groupSCMSparsityTree.add_argument('--SCST_max_depth', metavar='INT', + type=int, + action='store', + help='Max_depth of the trees', + default=1) + groupAdaboostPregenTree = parser.add_argument_group( 'AdaboostPregenTrees arguments') groupAdaboostPregenTree.add_argument('--AdPT_n_est', metavar='INT', @@ -394,19 +499,20 @@ def parseTheArgs(arguments): groupLasso = parser.add_argument_group('Lasso arguments') groupLasso.add_argument('--LA_n_iter', metavar='INT', type=int, - action='store', - help='Set the max_iter parameter for Lasso', - default=1) + action='store', + help='Set the max_iter parameter for Lasso', + default=1) groupLasso.add_argument('--LA_alpha', metavar='FLOAT', type=float, - action='store', - help='Set the alpha parameter for Lasso', - default=1.0) + action='store', + help='Set the alpha parameter for Lasso', + default=1.0) - groupGradientBoosting = parser.add_argument_group('Gradient Boosting arguments') + groupGradientBoosting = parser.add_argument_group( + 'Gradient Boosting arguments') groupGradientBoosting.add_argument('--GB_n_est', metavar='INT', type=int, - action='store', - help='Set the n_estimators_parameter for Gradient Boosting', - default=100) + action='store', + help='Set the n_estimators_parameter for Gradient Boosting', + default=100) groupMinCQ = parser.add_argument_group('MinCQ arguments') groupMinCQ.add_argument('--MCQ_mu', metavar='FLOAT', type=float, @@ -414,117 +520,159 @@ def parseTheArgs(arguments): help='Set the mu_parameter for MinCQ', default=0.05) groupMinCQ.add_argument('--MCQ_stumps', metavar='INT', type=int, - action='store', - help='Set the n_stumps_per_attribute parameter for MinCQ', - default=1) + action='store', + help='Set the n_stumps_per_attribute parameter for MinCQ', + default=1) groupMinCQGraalpy = parser.add_argument_group('MinCQGraalpy arguments') groupMinCQGraalpy.add_argument('--MCG_mu', metavar='FLOAT', type=float, - action='store', - help='Set the mu_parameter for MinCQGraalpy', - default=0.05) + action='store', + help='Set the mu_parameter for MinCQGraalpy', + default=0.05) groupMinCQGraalpy.add_argument('--MCG_stumps', metavar='INT', type=int, - action='store', - help='Set the n_stumps_per_attribute parameter for MinCQGraalpy', - default=1) - - + action='store', + help='Set the n_stumps_per_attribute parameter for MinCQGraalpy', + default=1) groupQarBoostv3 = parser.add_argument_group('QarBoostv3 arguments') - groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for QarBoostv3', default=0.001) - groupQarBoostv3.add_argument('--QarB3_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for QarBoostv3', default=1e-08) + groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for QarBoostv3', + default=0.001) + groupQarBoostv3.add_argument('--QarB3_epsilon', metavar='FLOAT', type=float, + action='store', + help='Set the epsilon parameter for QarBoostv3', + default=1e-08) groupQarBoostNC = parser.add_argument_group('QarBoostNC arguments') - groupQarBoostNC.add_argument('--QarBNC_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for QarBoostNC', default=0.001) - groupQarBoostNC.add_argument('--QarBNC_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for QarBoostNC', default=1e-08) + groupQarBoostNC.add_argument('--QarBNC_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for QarBoostNC', + default=0.001) + groupQarBoostNC.add_argument('--QarBNC_epsilon', metavar='FLOAT', + type=float, action='store', + help='Set the epsilon parameter for QarBoostNC', + default=1e-08) groupQarBoostNC2 = parser.add_argument_group('QarBoostNC2 arguments') - groupQarBoostNC2.add_argument('--QarBNC2_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for QarBoostNC2', default=0.001) - groupQarBoostNC2.add_argument('--QarBNC2_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for QarBoostNC2', default=1e-08) + groupQarBoostNC2.add_argument('--QarBNC2_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for QarBoostNC2', + default=0.001) + groupQarBoostNC2.add_argument('--QarBNC2_epsilon', metavar='FLOAT', + type=float, action='store', + help='Set the epsilon parameter for QarBoostNC2', + default=1e-08) groupQarBoostNC3 = parser.add_argument_group('QarBoostNC3 arguments') - groupQarBoostNC3.add_argument('--QarBNC3_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for QarBoostNC3', default=0.001) - groupQarBoostNC3.add_argument('--QarBNC3_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for QarBoostNC3', default=1e-08) + groupQarBoostNC3.add_argument('--QarBNC3_mu', metavar='FLOAT', type=float, + action='store', + help='Set the mu parameter for QarBoostNC3', + default=0.001) + groupQarBoostNC3.add_argument('--QarBNC3_epsilon', metavar='FLOAT', + type=float, action='store', + help='Set the epsilon parameter for QarBoostNC3', + default=1e-08) +# +# Multiview args +# groupMumbo = parser.add_argument_group('Mumbo arguments') - groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+", + groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', + nargs="+", help='Determine which monoview classifier to use with Mumbo', default=['']) - groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+', + groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', + nargs='+', help='Configuration for the monoview classifier in Mumbo separate each classifier with sapce and each argument with:', default=['']) groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3, - help='Max number of iteration, min number of iteration, convergence threshold', type=float, + help='Max number of iteration, min number of iteration, convergence threshold', + type=float, default=[10, 1, 0.01]) groupMumbo.add_argument('--MU_combination', action='store_true', help='Try all the monoview classifiers combinations for each view', default=False) - groupFusion = parser.add_argument_group('Fusion arguments') - groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+", + groupFusion.add_argument('--FU_types', metavar='STRING', action='store', + nargs="+", help='Determine which type of fusion to use', default=['']) groupEarlyFusion = parser.add_argument_group('Early Fusion arguments') - groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+", + groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', + action='store', nargs="+", help='Determine which early fusion method of fusion to use', default=['']) - groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', action='store', nargs='+', + groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', + action='store', nargs='+', help='Configuration for the early fusion methods separate ' 'method by space and values by :', default=['']) - groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', action='store', nargs='+', + groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', + action='store', nargs='+', help='Configuration for the monoview classifiers used separate classifier by space ' 'and configs must be of form argument1_name:value,argument2_name:value', default=['']) - groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', action='store', nargs='+', - help='Name of the classifiers used for each early fusion method', default=['']) + groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', + action='store', nargs='+', + help='Name of the classifiers used for each early fusion method', + default=['']) groupLateFusion = parser.add_argument_group('Late Fusion arguments') - groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+", + groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', + action='store', nargs="+", help='Determine which late fusion method of fusion to use', default=['']) - groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', action='store', nargs='+', - help='Configuration for the fusion method', default=['']) - groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', action='store', nargs='+', - help='Configuration for the monoview classifiers used', default=['']) - groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', action='store', nargs="+", - help='Names of the classifier used for late fusion', default=['']) - groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store', + groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', + action='store', nargs='+', + help='Configuration for the fusion method', + default=['']) + groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', + action='store', nargs='+', + help='Configuration for the monoview classifiers used', + default=['']) + groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', + action='store', nargs="+", + help='Names of the classifier used for late fusion', + default=['']) + groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', + action='store', help='Determine which method to use to select the monoview classifiers', default="intersect") groupFatLateFusion = parser.add_argument_group('Fat Late Fusion arguments') - groupFatLateFusion.add_argument('--FLF_weights', metavar='FLOAT', action='store', nargs="+", - help='Determine the weights of each monoview decision for FLF', type=float, - default=[]) - - groupFatSCMLateFusion = parser.add_argument_group('Fat SCM Late Fusion arguments') - groupFatSCMLateFusion.add_argument('--FSCMLF_p', metavar='FLOAT', action='store', - help='Determine the p argument of the SCM', type=float, - default=0.5) - groupFatSCMLateFusion.add_argument('--FSCMLF_max_attributes', metavar='INT', action='store', - help='Determine the maximum number of aibutes used by the SCM', type=int, - default=4) - groupFatSCMLateFusion.add_argument('--FSCMLF_model', metavar='STRING', action='store', - help='Determine the model type of the SCM', - default="conjunction") - - groupDisagreeFusion = parser.add_argument_group('Disagreement based fusion arguments') - groupDisagreeFusion.add_argument('--DGF_weights', metavar='FLOAT', action='store', nargs="+", - help='Determine the weights of each monoview decision for DFG', type=float, + groupFatLateFusion.add_argument('--FLF_weights', metavar='FLOAT', + action='store', nargs="+", + help='Determine the weights of each monoview decision for FLF', + type=float, default=[]) + groupFatSCMLateFusion = parser.add_argument_group( + 'Fat SCM Late Fusion arguments') + groupFatSCMLateFusion.add_argument('--FSCMLF_p', metavar='FLOAT', + action='store', + help='Determine the p argument of the SCM', + type=float, + default=0.5) + groupFatSCMLateFusion.add_argument('--FSCMLF_max_attributes', metavar='INT', + action='store', + help='Determine the maximum number of aibutes used by the SCM', + type=int, + default=4) + groupFatSCMLateFusion.add_argument('--FSCMLF_model', metavar='STRING', + action='store', + help='Determine the model type of the SCM', + default="conjunction") + groupDisagreeFusion = parser.add_argument_group( + 'Disagreement based fusion arguments') + groupDisagreeFusion.add_argument('--DGF_weights', metavar='FLOAT', + action='store', nargs="+", + help='Determine the weights of each monoview decision for DFG', + type=float, + default=[]) args = parser.parse_args(arguments) return args @@ -585,7 +733,9 @@ def initStatsIterRandomStates(statsIter, randomState): Multiple random states, one for each sattistical iteration of the same benchmark. """ if statsIter > 1: - statsIterRandomStates = [np.random.RandomState(randomState.randint(5000)) for _ in range(statsIter)] + statsIterRandomStates = [ + np.random.RandomState(randomState.randint(5000)) for _ in + range(statsIter)] else: statsIterRandomStates = [randomState] return statsIterRandomStates @@ -637,17 +787,21 @@ def initLogFile(name, views, CL_type, log, debug, label, result_directory): Reference to the main results directory for the benchmark. """ if debug: - resultDirectory = result_directory + name + "/debug_started_" + time.strftime("%Y_%m_%d-%H_%M_%S") + "_" + label + "/" + resultDirectory = result_directory + name + "/debug_started_" + time.strftime( + "%Y_%m_%d-%H_%M_%S") + "_" + label + "/" else: - resultDirectory = result_directory + name + "/started_" + time.strftime("%Y_%m_%d-%H_%M") + "_" + label + "/" - logFileName = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join(CL_type) + "-" + "_".join( + resultDirectory = result_directory + name + "/started_" + time.strftime( + "%Y_%m_%d-%H_%M") + "_" + label + "/" + logFileName = time.strftime("%Y_%m_%d-%H_%M") + "-" + ''.join( + CL_type) + "-" + "_".join( views) + "-" + name + "-LOG" if os.path.exists(os.path.dirname(resultDirectory)): raise NameError("The result dir already exists, wait 1 min and retry") os.makedirs(os.path.dirname(resultDirectory + logFileName)) logFile = resultDirectory + logFileName logFile += ".log" - logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', filename=logFile, level=logging.DEBUG, + logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', + filename=logFile, level=logging.DEBUG, filemode='w') if log: logging.getLogger().addHandler(logging.StreamHandler()) @@ -709,9 +863,12 @@ def genKFolds(statsIter, nbFolds, statsIterRandomStates): if statsIter > 1: foldsList = [] for randomState in statsIterRandomStates: - foldsList.append(sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, random_state=randomState)) + foldsList.append( + sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, + random_state=randomState)) else: - foldsList = [sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, random_state=statsIterRandomStates)] + foldsList = [sklearn.model_selection.StratifiedKFold(n_splits=nbFolds, + random_state=statsIterRandomStates)] return foldsList @@ -738,8 +895,10 @@ def initViews(DATASET, argViews): if argViews != [""]: allowedViews = argViews allViews = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) - if type(DATASET.get("View" + str(viewIndex)).attrs["name"])!=bytes - else DATASET.get("View" + str(viewIndex)).attrs["name"].decode("utf-8") + if type( + DATASET.get("View" + str(viewIndex)).attrs["name"]) != bytes + else DATASET.get("View" + str(viewIndex)).attrs[ + "name"].decode("utf-8") for viewIndex in range(NB_VIEW)] views = [] viewsIndices = [] @@ -752,9 +911,11 @@ def initViews(DATASET, argViews): viewsIndices.append(viewIndex) else: views = [str(DATASET.get("View" + str(viewIndex)).attrs["name"]) - if type(DATASET.get("View" + str(viewIndex)).attrs["name"])!=bytes - else DATASET.get("View" + str(viewIndex)).attrs["name"].decode("utf-8") - for viewIndex in range(NB_VIEW)] + if type( + DATASET.get("View" + str(viewIndex)).attrs["name"]) != bytes + else DATASET.get("View" + str(viewIndex)).attrs["name"].decode( + "utf-8") + for viewIndex in range(NB_VIEW)] viewsIndices = range(NB_VIEW) allViews = views return views, viewsIndices, allViews @@ -784,8 +945,11 @@ def genDirecortiesNames(directory, statsIter): return directories -def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, labelsCombinations, indicesMulticlass, - hyperParamSearch, args, kFolds, statsIterRandomStates, metrics, argumentDictionaries, +def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, + labelsCombinations, indicesMulticlass, + hyperParamSearch, args, kFolds, + statsIterRandomStates, metrics, + argumentDictionaries, benchmark, nbViews, views, viewsIndices): r"""Used to generate a dictionary for each benchmark. @@ -836,25 +1000,28 @@ def genArgumentDictionaries(labelsDictionary, directories, multiclassLabels, lab benchmarkArgumentDictionaries = [] for combinationIndex, labelsCombination in enumerate(labelsCombinations): for iterIndex, iterRandomState in enumerate(statsIterRandomStates): - benchmarkArgumentDictionary = {"LABELS_DICTIONARY": {0:labelsDictionary[labelsCombination[0]], - 1:labelsDictionary[labelsCombination[1]]}, - "directory": directories[iterIndex]+ - labelsDictionary[labelsCombination[0]]+ - "-vs-"+ - labelsDictionary[labelsCombination[1]]+"/", - "classificationIndices": [indicesMulticlass[combinationIndex][0][iterIndex], - indicesMulticlass[combinationIndex][1][iterIndex], - indicesMulticlass[combinationIndex][2][iterIndex]], - "args": args, - "labels": multiclassLabels[combinationIndex], - "kFolds": kFolds[iterIndex], - "randomState": iterRandomState, - "hyperParamSearch": hyperParamSearch, - "metrics": metrics, - "argumentDictionaries": argumentDictionaries, - "benchmark": benchmark, - "views": views, - "viewsIndices": viewsIndices, - "flag": [iterIndex, labelsCombination]} + benchmarkArgumentDictionary = { + "LABELS_DICTIONARY": {0: labelsDictionary[labelsCombination[0]], + 1: labelsDictionary[ + labelsCombination[1]]}, + "directory": directories[iterIndex] + + labelsDictionary[labelsCombination[0]] + + "-vs-" + + labelsDictionary[labelsCombination[1]] + "/", + "classificationIndices": [ + indicesMulticlass[combinationIndex][0][iterIndex], + indicesMulticlass[combinationIndex][1][iterIndex], + indicesMulticlass[combinationIndex][2][iterIndex]], + "args": args, + "labels": multiclassLabels[combinationIndex], + "kFolds": kFolds[iterIndex], + "randomState": iterRandomState, + "hyperParamSearch": hyperParamSearch, + "metrics": metrics, + "argumentDictionaries": argumentDictionaries, + "benchmark": benchmark, + "views": views, + "viewsIndices": viewsIndices, + "flag": [iterIndex, labelsCombination]} benchmarkArgumentDictionaries.append(benchmarkArgumentDictionary) return benchmarkArgumentDictionaries diff --git a/multiview_platform/Tests.py b/multiview_platform/Tests.py index 318166e2d0d4599c263796eca66eeaca1e283a52..25b01535d1b28e7acc5dcb78f096e02a60891fa0 100644 --- a/multiview_platform/Tests.py +++ b/multiview_platform/Tests.py @@ -3,4 +3,4 @@ # from .Tests.test_ExecClassif import suite # # runner = unittest.TextTestRunner() -# runner.run(suite()) \ No newline at end of file +# runner.run(suite()) diff --git a/multiview_platform/Tests/Test_Metrics/test_accuracy_score.py b/multiview_platform/Tests/Test_Metrics/test_accuracy_score.py index b20f39441b8a23c7187b28d5efd9b7ecfc45319b..622141cf592cfee0788fd0baed88137c9d347372 100644 --- a/multiview_platform/Tests/Test_Metrics/test_accuracy_score.py +++ b/multiview_platform/Tests/Test_Metrics/test_accuracy_score.py @@ -1,7 +1,5 @@ import unittest -import os -from ...MonoMultiViewClassifiers.Metrics import accuracy_score # Tester que chaque metrique a bien les bonnes fonctions qui renvoient bien les bons types d'outputs avec les bons types d'inputs # Faire de meme pour les differents classifeurs monovues et les differents classifeurs multivues diff --git a/multiview_platform/Tests/Test_MonoView/test_ExecClassifMonoView.py b/multiview_platform/Tests/Test_MonoView/test_ExecClassifMonoView.py index 437b530d7438931b1450c131e29880aa19cb4df3..c1ddf2e5ff998fdd73c39a1539752333da6c0d76 100644 --- a/multiview_platform/Tests/Test_MonoView/test_ExecClassifMonoView.py +++ b/multiview_platform/Tests/Test_MonoView/test_ExecClassifMonoView.py @@ -1,7 +1,8 @@ +import os import unittest -import numpy as np + import h5py -import os +import numpy as np from ...MonoMultiViewClassifiers.Monoview import ExecClassifMonoView @@ -11,14 +12,17 @@ class Test_initConstants(unittest.TestCase): @classmethod def setUpClass(cls): os.mkdir("multiview_platform/Tests/temp_tests") - cls.datasetFile = h5py.File("multiview_platform/Tests/temp_tests/test.hdf5", "w") + cls.datasetFile = h5py.File( + "multiview_platform/Tests/temp_tests/test.hdf5", "w") cls.random_state = np.random.RandomState(42) cls.args = {"CL_type": "test_clf"} - cls.X_value = cls.random_state.randint(0,500,(10,20)) + cls.X_value = cls.random_state.randint(0, 500, (10, 20)) cls.X = cls.datasetFile.create_dataset("View0", data=cls.X_value) cls.X.attrs["name"] = "test_dataset" cls.X.attrs["sparse"] = False - cls.classificationIndices = [np.array([0,2,4,6,8]), np.array([1,3,5,7,9]), np.array([1,3,5,7,9])] + cls.classificationIndices = [np.array([0, 2, 4, 6, 8]), + np.array([1, 3, 5, 7, 9]), + np.array([1, 3, 5, 7, 9])] cls.labelsNames = ["test_true", "test_false"] cls.name = "test" cls.directory = "multiview_platform/Tests/temp_tests/test_dir/" @@ -48,7 +52,8 @@ class Test_initConstants(unittest.TestCase): @classmethod def tearDownClass(cls): os.remove("multiview_platform/Tests/temp_tests/test.hdf5") - os.rmdir("multiview_platform/Tests/temp_tests/test_dir/test_clf/test_dataset") + os.rmdir( + "multiview_platform/Tests/temp_tests/test_dir/test_clf/test_dataset") os.rmdir("multiview_platform/Tests/temp_tests/test_dir/test_clf") os.rmdir("multiview_platform/Tests/temp_tests/test_dir") os.rmdir("multiview_platform/Tests/temp_tests") @@ -59,24 +64,29 @@ class Test_initTrainTest(unittest.TestCase): @classmethod def setUpClass(cls): cls.random_state = np.random.RandomState(42) - cls.X = cls.random_state.randint(0,500,(10,5)) - cls.Y = cls.random_state.randint(0,2,10) - cls.classificationIndices = [np.array([0,2,4,6,8]),np.array([1,3,5,7,9]), np.array([1,3,5,7,9])] + cls.X = cls.random_state.randint(0, 500, (10, 5)) + cls.Y = cls.random_state.randint(0, 2, 10) + cls.classificationIndices = [np.array([0, 2, 4, 6, 8]), + np.array([1, 3, 5, 7, 9]), + np.array([1, 3, 5, 7, 9])] def test_simple(cls): - X_train, y_train, X_test, y_test, X_test_multiclass = ExecClassifMonoView.initTrainTest(cls.X, cls.Y, cls.classificationIndices) - np.testing.assert_array_equal(X_train, np.array([np.array([102,435,348,270,106]), - np.array([466,214,330,458,87]), - np.array([149,308,257,343,491]), - np.array([276,160,459,313,21]), - np.array([58,169,475,187,463])])) - np.testing.assert_array_equal(X_test, np.array([np.array([71,188,20,102,121]), - np.array([372,99,359,151,130]), - np.array([413,293,385,191,443]), - np.array([252,235,344,48,474]), - np.array([270,189,445,174,445])])) - np.testing.assert_array_equal(y_train, np.array([0,0,1,0,0])) - np.testing.assert_array_equal(y_test, np.array([1,1,0,0,0])) + X_train, y_train, X_test, y_test, X_test_multiclass = ExecClassifMonoView.initTrainTest( + cls.X, cls.Y, cls.classificationIndices) + np.testing.assert_array_equal(X_train, np.array( + [np.array([102, 435, 348, 270, 106]), + np.array([466, 214, 330, 458, 87]), + np.array([149, 308, 257, 343, 491]), + np.array([276, 160, 459, 313, 21]), + np.array([58, 169, 475, 187, 463])])) + np.testing.assert_array_equal(X_test, np.array( + [np.array([71, 188, 20, 102, 121]), + np.array([372, 99, 359, 151, 130]), + np.array([413, 293, 385, 191, 443]), + np.array([252, 235, 344, 48, 474]), + np.array([270, 189, 445, 174, 445])])) + np.testing.assert_array_equal(y_train, np.array([0, 0, 1, 0, 0])) + np.testing.assert_array_equal(y_test, np.array([1, 1, 0, 0, 0])) # class Test_getKWARGS(unittest.TestCase): # diff --git a/multiview_platform/Tests/Test_MonoView/test_MonoviewUtils.py b/multiview_platform/Tests/Test_MonoView/test_MonoviewUtils.py index 026541f68e05c13c7d99aecf7ad382db01b8999f..d68b1c202ec3e74c044030596e6fe3d49cbb430d 100644 --- a/multiview_platform/Tests/Test_MonoView/test_MonoviewUtils.py +++ b/multiview_platform/Tests/Test_MonoView/test_MonoviewUtils.py @@ -1,16 +1,18 @@ import unittest + import numpy as np from sklearn.model_selection import StratifiedKFold from sklearn.tree.tree import DecisionTreeClassifier from ...MonoMultiViewClassifiers.Monoview import MonoviewUtils + class Test_genTestFoldsPreds(unittest.TestCase): @classmethod def setUpClass(cls): cls.random_state = np.random.RandomState(42) - cls.X_train = cls.random_state.random_sample((31,10)) + cls.X_train = cls.random_state.random_sample((31, 10)) cls.y_train = np.ones(31, dtype=int) cls.KFolds = StratifiedKFold(n_splits=3, random_state=cls.random_state) @@ -21,8 +23,10 @@ class Test_genTestFoldsPreds(unittest.TestCase): # print(cls.y_train) def test_simple(cls): - testFoldsPreds = MonoviewUtils.genTestFoldsPreds(cls.X_train, cls.y_train, cls.KFolds, cls.estimator) - cls.assertEqual(testFoldsPreds.shape, (3,10)) - np.testing.assert_array_equal(testFoldsPreds[0], np.array([ 1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) - - + testFoldsPreds = MonoviewUtils.genTestFoldsPreds(cls.X_train, + cls.y_train, + cls.KFolds, + cls.estimator) + cls.assertEqual(testFoldsPreds.shape, (3, 10)) + np.testing.assert_array_equal(testFoldsPreds[0], np.array( + [1, 1, -1, -1, 1, 1, -1, 1, -1, 1])) diff --git a/multiview_platform/Tests/Test_MonoviewClassifiers/test_compatibility.py b/multiview_platform/Tests/Test_MonoviewClassifiers/test_compatibility.py index d90dde0bb008fab06c1f1bc006a6bdc427d7ce3c..d5474eb5d5c2845fcb0991f05e3b15d3e3495a3d 100644 --- a/multiview_platform/Tests/Test_MonoviewClassifiers/test_compatibility.py +++ b/multiview_platform/Tests/Test_MonoviewClassifiers/test_compatibility.py @@ -1,65 +1,87 @@ -import unittest import os -import h5py +import unittest + import numpy as np -from sklearn.utils.estimator_checks import check_estimator from ...MonoMultiViewClassifiers import MonoviewClassifiers + class Test_methods(unittest.TestCase): def test_simple(self): - for fileName in os.listdir("multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): + for fileName in os.listdir( + "multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): if fileName[-3:] == ".py" and fileName != "__init__.py": - monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3]) + monoview_classifier_module = getattr(MonoviewClassifiers, + fileName[:-3]) self.assertIn("formatCmdArgs", dir(monoview_classifier_module), - fileName[:-3]+" must have getKWARGS method implemented") + fileName[ + :-3] + " must have getKWARGS method implemented") self.assertIn("paramsToSet", dir(monoview_classifier_module), - fileName[:-3]+" must have randomizedSearch method implemented") + fileName[ + :-3] + " must have randomizedSearch method implemented") self.assertIn(fileName[:-3], dir(monoview_classifier_module), - fileName[:-3] + " must have it's own class implemented") - - monoview_classifier_class = getattr(monoview_classifier_module, fileName[:-3]) - self.assertTrue(hasattr(monoview_classifier_class, "getInterpret"), - fileName[:-3] + " class must have getInterpret implemented") - self.assertTrue(hasattr(monoview_classifier_class, "canProbas",), - fileName[:-3] + " class must have canProbas implemented") + fileName[ + :-3] + " must have it's own class implemented") + + monoview_classifier_class = getattr(monoview_classifier_module, + fileName[:-3]) + self.assertTrue( + hasattr(monoview_classifier_class, "getInterpret"), + fileName[:-3] + " class must have getInterpret implemented") + self.assertTrue( + hasattr(monoview_classifier_class, "canProbas", ), + fileName[:-3] + " class must have canProbas implemented") monoview_classifier_instance = monoview_classifier_class() - self.assertTrue(hasattr(monoview_classifier_instance, "param_names", ), - fileName[:-3] + " class must have param_names attribute") - self.assertTrue(hasattr(monoview_classifier_instance, "classed_params", ), - fileName[:-3] + " class must have classed_params attribute") - self.assertTrue(hasattr(monoview_classifier_instance, "distribs", ), - fileName[:-3] + " class must have distribs attribute") - self.assertTrue(hasattr(monoview_classifier_instance, "weird_strings", ), - fileName[:-3] + " class must have weird_strings attribute") + self.assertTrue( + hasattr(monoview_classifier_instance, "param_names", ), + fileName[:-3] + " class must have param_names attribute") + self.assertTrue( + hasattr(monoview_classifier_instance, "classed_params", ), + fileName[:-3] + " class must have classed_params attribute") + self.assertTrue( + hasattr(monoview_classifier_instance, "distribs", ), + fileName[:-3] + " class must have distribs attribute") + self.assertTrue( + hasattr(monoview_classifier_instance, "weird_strings", ), + fileName[:-3] + " class must have weird_strings attribute") # check_estimator(monoview_classifier_instance) + class Test_canProbas(unittest.TestCase): def test_outputs(self): - for fileName in os.listdir("multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): + for fileName in os.listdir( + "multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): if fileName[-3:] == ".py" and fileName != "__init__.py": - monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3]) - monoview_classifier_class = getattr(monoview_classifier_module, fileName[:-3])() + monoview_classifier_module = getattr(MonoviewClassifiers, + fileName[:-3]) + monoview_classifier_class = getattr(monoview_classifier_module, + fileName[:-3])() res = monoview_classifier_class.canProbas() - self.assertEqual(type(res), bool, "canProbas must return a boolean") + self.assertEqual(type(res), bool, + "canProbas must return a boolean") def test_inputs(self): - for fileName in os.listdir("multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): + for fileName in os.listdir( + "multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): if fileName[-3:] == ".py" and fileName != "__init__.py": - monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3]) - monoview_classifier_class = getattr(monoview_classifier_module, fileName[:-3])() - with self.assertRaises(TypeError, msg="canProbas must have 0 args") as catcher: + monoview_classifier_module = getattr(MonoviewClassifiers, + fileName[:-3]) + monoview_classifier_class = getattr(monoview_classifier_module, + fileName[:-3])() + with self.assertRaises(TypeError, + msg="canProbas must have 0 args") as catcher: monoview_classifier_class.canProbas(35) + class Test_fit(unittest.TestCase): @classmethod def setUpClass(cls): cls.random_state = np.random.RandomState(42) - cls.dataset = cls.random_state.random_sample((10,20)) - cls.labels = cls.random_state.randint(0,2,10) + cls.dataset = cls.random_state.random_sample((10, 20)) + cls.labels = cls.random_state.randint(0, 2, 10) # def test_inputs(cls): # # DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs @@ -88,20 +110,31 @@ class Test_fit(unittest.TestCase): class Test_paramsToSet(unittest.TestCase): def test_inputs(self): - for fileName in os.listdir("multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): + for fileName in os.listdir( + "multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): if fileName[-3:] == ".py" and fileName != "__init__.py": - monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3]) - with self.assertRaises(TypeError, msg="paramsToSet must have 2 positional args") as catcher: - monoview_classifier_module.paramsToSet(2, np.random.RandomState(42), 10) + monoview_classifier_module = getattr(MonoviewClassifiers, + fileName[:-3]) + with self.assertRaises(TypeError, + msg="paramsToSet must have 2 positional args") as catcher: + monoview_classifier_module.paramsToSet(2, + np.random.RandomState( + 42), 10) monoview_classifier_module.paramsToSet(2) monoview_classifier_module.paramsToSet() - res = monoview_classifier_module.paramsToSet(2, np.random.RandomState(42)) + res = monoview_classifier_module.paramsToSet(2, + np.random.RandomState( + 42)) def test_outputs(self): - for fileName in os.listdir("multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): + for fileName in os.listdir( + "multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers"): if fileName[-3:] == ".py" and fileName != "__init__.py": - monoview_classifier_module = getattr(MonoviewClassifiers, fileName[:-3]) - res = monoview_classifier_module.paramsToSet(2, np.random.RandomState(42)) + monoview_classifier_module = getattr(MonoviewClassifiers, + fileName[:-3]) + res = monoview_classifier_module.paramsToSet(2, + np.random.RandomState( + 42)) self.assertEqual(type(res), list) self.assertEqual(len(res), 2) self.assertEqual(type(res[0]), dict) @@ -117,4 +150,3 @@ class Test_paramsToSet(unittest.TestCase): # with self.assertRaises(TypeError, msg="getKWARGS must have 1 positional args") as catcher: # monoview_classifier_module.getKWARGS() # monoview_classifier_module.getKWARGS([1],2) - diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DifficultyMeasure/test_DifficultyMeasureModule.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DifficultyMeasure/test_DifficultyMeasureModule.py index 064ff0afc1a4904bf9ac2ab96f28f17a9602790d..08462ada48a68813c20399296a1e29227d54184b 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DifficultyMeasure/test_DifficultyMeasureModule.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DifficultyMeasure/test_DifficultyMeasureModule.py @@ -1,21 +1,26 @@ import unittest + import numpy as np -from ....MonoMultiViewClassifiers.MultiviewClassifiers.DifficultyFusion import DifficultyFusionModule +from ....MonoMultiViewClassifiers.MultiviewClassifiers.DifficultyFusion import \ + DifficultyFusionModule + class Test_difficulty(unittest.TestCase): @classmethod def setUpClass(cls): cls.classifiersDecisions = np.array([ - [np.random.randint(0,2, (2, 5)), np.array([[0, 0, 1, 0, 1], [0, 1, 0, 1, 0]]), - np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), - np.random.randint(0,2, (2, 5))], - [np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), - np.random.randint(0,2, (2, 5)), np.array([[0, 0, 1, 1, 0], [0, 1, 0, 1, 0]]), - np.random.randint(0,2, (2, 5))], - [np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), - np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), + [np.random.randint(0, 2, (2, 5)), + np.array([[0, 0, 1, 0, 1], [0, 1, 0, 1, 0]]), + np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), + np.random.randint(0, 2, (2, 5))], + [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), + np.random.randint(0, 2, (2, 5)), + np.array([[0, 0, 1, 1, 0], [0, 1, 0, 1, 0]]), + np.random.randint(0, 2, (2, 5))], + [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), + np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), np.array([[0, 1, 1, 1, 1], [0, 1, 0, 1, 0]])], ]) cls.combination = [1, 3, 4] @@ -23,8 +28,9 @@ class Test_difficulty(unittest.TestCase): cls.foldsLen = "" def test_simple(cls): - difficulty_measure = DifficultyFusionModule.difficulty(cls.classifiersDecisions, - cls.combination, - cls.foldsGroudTruth, - cls.foldsLen) + difficulty_measure = DifficultyFusionModule.difficulty( + cls.classifiersDecisions, + cls.combination, + cls.foldsGroudTruth, + cls.foldsLen) cls.assertAlmostEqual(difficulty_measure, 0.29861111111) diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py index 4fbe73e310643bf344f7e284fe3a8f552720c180..d27ccf3b9fd36ea5309ba1a9113bd8145d8250b1 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DisagreeFusion/test_DisagreeFusionModule.py @@ -1,8 +1,11 @@ import unittest + import numpy as np -from ....MonoMultiViewClassifiers.MultiviewClassifiers.DisagreeFusion import DisagreeFusionModule -from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import diversity_utils +from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import \ + diversity_utils +from ....MonoMultiViewClassifiers.MultiviewClassifiers.DisagreeFusion import \ + DisagreeFusionModule class Test_disagreement(unittest.TestCase): @@ -12,125 +15,157 @@ class Test_disagreement(unittest.TestCase): cls.randomState = np.random.RandomState(42) cls.allClassifiersNames = ["SCM", "SVM", "DT"] cls.directory = "" - cls.viewsIndices = [0,1] - cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2,6)]) - ]], - [1, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [1, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [1, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + cls.viewsIndices = [0, 1] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [1, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [1, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [1, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]] ] cls.classificationIndices = [] def test_simple(cls): - bestCombi, disagreement = diversity_utils.couple_div_measure(cls.allClassifiersNames, cls.viewsIndices, cls.resultsMonoview, DisagreeFusionModule.disagree, "") + bestCombi, disagreement = diversity_utils.couple_div_measure( + cls.allClassifiersNames, cls.viewsIndices, cls.resultsMonoview, + DisagreeFusionModule.disagree, "") cls.assertAlmostEqual(disagreement, 0.666666666667) cls.assertEqual(len(bestCombi), 2) def test_viewsIndices(cls): - cls.viewsIndices = [0,6] - cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + cls.viewsIndices = [0, 6] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]] ] - bestCombi, disagreement = diversity_utils.couple_div_measure(cls.allClassifiersNames, cls.viewsIndices, - cls.resultsMonoview, DisagreeFusionModule.disagree, - "") + bestCombi, disagreement = diversity_utils.couple_div_measure( + cls.allClassifiersNames, cls.viewsIndices, + cls.resultsMonoview, DisagreeFusionModule.disagree, + "") cls.assertAlmostEqual(disagreement, 0.611111111111) cls.assertEqual(len(bestCombi), 2) def test_multipleViews(cls): cls.viewsIndices = [0, 6, 18] - cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [18, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [18, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [18, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [18, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, + 6)]) + ]], + [18, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, + 6)]) + ]], + [18, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, + 6)]) + ]] ] - bestCombi, disagreement = diversity_utils.couple_div_measure(cls.allClassifiersNames, cls.viewsIndices, - cls.resultsMonoview, DisagreeFusionModule.disagree, - "") + bestCombi, disagreement = diversity_utils.couple_div_measure( + cls.allClassifiersNames, cls.viewsIndices, + cls.resultsMonoview, DisagreeFusionModule.disagree, + "") cls.assertAlmostEqual(disagreement, 0.592592592593) cls.assertEqual(len(bestCombi), 3) + class Test_disagree(unittest.TestCase): @classmethod def setUpClass(cls): - cls.monoviewDecision1 = np.array([0,0,1,1]) - cls.monoviewDecision2 = np.array([0,1,0,1]) + cls.monoviewDecision1 = np.array([0, 0, 1, 1]) + cls.monoviewDecision2 = np.array([0, 1, 0, 1]) cls.ground_truth = None def test_simple(cls): - disagreement = DisagreeFusionModule.disagree(cls.monoviewDecision1, cls.monoviewDecision2, cls.ground_truth) - np.testing.assert_array_equal(disagreement, np.array([False,True,True, False])) + disagreement = DisagreeFusionModule.disagree(cls.monoviewDecision1, + cls.monoviewDecision2, + cls.ground_truth) + np.testing.assert_array_equal(disagreement, + np.array([False, True, True, False])) diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py index 0924eaa5115637a19d1367d9cf27089ebd71fb37..7267f3087a0c40e54f31e9c045644750e17ab2ed 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_DoubleFaultFusion/test_DoubleFaultFusionModule.py @@ -1,8 +1,11 @@ import unittest + import numpy as np -from ....MonoMultiViewClassifiers.MultiviewClassifiers.DoubleFaultFusion import DoubleFaultFusionModule -from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import diversity_utils +from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import \ + diversity_utils +from ....MonoMultiViewClassifiers.MultiviewClassifiers.DoubleFaultFusion import \ + DoubleFaultFusionModule class Test_doubleFaultRatio(unittest.TestCase): @@ -12,118 +15,145 @@ class Test_doubleFaultRatio(unittest.TestCase): cls.randomState = np.random.RandomState(42) cls.allClassifiersNames = ["SCM", "SVM", "DT"] cls.directory = "" - cls.viewsIndices = [0,1] - cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2,6)]) - ]], - [1, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [1, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [1, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + cls.viewsIndices = [0, 1] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [1, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [1, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [1, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]] ] cls.classificationIndices = [] - cls.ground_truth = np.array([1,1,1,0,0,0]) + cls.ground_truth = np.array([1, 1, 1, 0, 0, 0]) def test_simple(cls): - bestCombi, disagreement = diversity_utils.couple_div_measure(cls.allClassifiersNames, cls.viewsIndices, - cls.resultsMonoview, - DoubleFaultFusionModule.doubleFault, - cls.ground_truth) + bestCombi, disagreement = diversity_utils.couple_div_measure( + cls.allClassifiersNames, cls.viewsIndices, + cls.resultsMonoview, + DoubleFaultFusionModule.doubleFault, + cls.ground_truth) cls.assertAlmostEqual(disagreement, 0.55555555555555547) cls.assertEqual(len(bestCombi), 2) def test_viewsIndices(cls): - cls.viewsIndices = [0,6] - cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + cls.viewsIndices = [0, 6] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]] ] - bestCombi, disagreement = diversity_utils.couple_div_measure(cls.allClassifiersNames, cls.viewsIndices, - cls.resultsMonoview, DoubleFaultFusionModule.doubleFault, - cls.ground_truth) + bestCombi, disagreement = diversity_utils.couple_div_measure( + cls.allClassifiersNames, cls.viewsIndices, + cls.resultsMonoview, DoubleFaultFusionModule.doubleFault, + cls.ground_truth) cls.assertAlmostEqual(disagreement, 0.33333333333333331) cls.assertEqual(len(bestCombi), 2) def test_multipleViews(cls): cls.viewsIndices = [0, 6, 18] - cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [0, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [6, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [18, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [18, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]], - [18, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) - ]] + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [0, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [6, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) + ]], + [18, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, + 6)]) + ]], + [18, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, + 6)]) + ]], + [18, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, + 6)]) + ]] ] - bestCombi, disagreement = diversity_utils.couple_div_measure(cls.allClassifiersNames, cls.viewsIndices, - cls.resultsMonoview, DoubleFaultFusionModule.doubleFault, - cls.ground_truth) + bestCombi, disagreement = diversity_utils.couple_div_measure( + cls.allClassifiersNames, cls.viewsIndices, + cls.resultsMonoview, DoubleFaultFusionModule.doubleFault, + cls.ground_truth) cls.assertAlmostEqual(disagreement, 0.31481481481481483) cls.assertEqual(len(bestCombi), 3) @@ -132,10 +162,12 @@ class Test_doubleFault(unittest.TestCase): @classmethod def setUpClass(cls): - cls.monoviewDecision1 = np.array([0,0,1,1,0,0,1,1]) - cls.monoviewDecision2 = np.array([0,1,0,1,0,1,0,1]) - cls.ground_truth = np.array([0,0,0,0,1,1,1,1]) + cls.monoviewDecision1 = np.array([0, 0, 1, 1, 0, 0, 1, 1]) + cls.monoviewDecision2 = np.array([0, 1, 0, 1, 0, 1, 0, 1]) + cls.ground_truth = np.array([0, 0, 0, 0, 1, 1, 1, 1]) def test_simple(cls): - disagreement = DoubleFaultFusionModule.doubleFault(cls.monoviewDecision1, cls.monoviewDecision2, cls.ground_truth) - np.testing.assert_array_equal(disagreement, np.array([False,False,False,True,True,False,False,False])) + disagreement = DoubleFaultFusionModule.doubleFault( + cls.monoviewDecision1, cls.monoviewDecision2, cls.ground_truth) + np.testing.assert_array_equal(disagreement, np.array( + [False, False, False, True, True, False, False, False])) diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_EntropyFusion/test_EntropyFusionModule.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_EntropyFusion/test_EntropyFusionModule.py index ec3d510bea4ecaf2e26e6a28fc00c094cebe0e24..00b4a911b9912f6ba23249e337dfa6c3c04dded4 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_EntropyFusion/test_EntropyFusionModule.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_EntropyFusion/test_EntropyFusionModule.py @@ -2,21 +2,34 @@ import unittest import numpy as np -from ....MonoMultiViewClassifiers.MultiviewClassifiers.EntropyFusion import EntropyFusionModule +from ....MonoMultiViewClassifiers.MultiviewClassifiers.EntropyFusion import \ + EntropyFusionModule + class Test_entropy(unittest.TestCase): @classmethod def setUpClass(cls): cls.classifiersDecisions = np.array([ - [np.random.randint(0,2,(2,5)), [[0,0,1,0,1], [0,1,0,1,0]], np.random.randint(0,2,(2,5)), np.random.randint(0,2,(2,5)), np.random.randint(0,2,(2,5))], - [np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), [[0, 0, 1, 1, 0], [0, 1, 0, 1, 0]], np.random.randint(0,2, (2, 5))], - [np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), np.random.randint(0,2, (2, 5)), [[0, 1, 1, 1, 1], [0, 1, 0, 1, 0]]], - ]) - cls.combination = [1,3,4] - cls.foldsGroudTruth = np.array([[1,1,0,0,1], [0,1,0,1,0]]) + [np.random.randint(0, 2, (2, 5)), + [[0, 0, 1, 0, 1], [0, 1, 0, 1, 0]], + np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), + np.random.randint(0, 2, (2, 5))], + [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), + np.random.randint(0, 2, (2, 5)), + [[0, 0, 1, 1, 0], [0, 1, 0, 1, 0]], + np.random.randint(0, 2, (2, 5))], + [np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), + np.random.randint(0, 2, (2, 5)), np.random.randint(0, 2, (2, 5)), + [[0, 1, 1, 1, 1], [0, 1, 0, 1, 0]]], + ]) + cls.combination = [1, 3, 4] + cls.foldsGroudTruth = np.array([[1, 1, 0, 0, 1], [0, 1, 0, 1, 0]]) cls.foldsLen = "" def test_simple(cls): - entropy_score = EntropyFusionModule.entropy(cls.classifiersDecisions, cls.combination, cls.foldsGroudTruth,cls.foldsLen) - cls.assertEqual(entropy_score, 0.15, 'Wrong values for entropy measure') \ No newline at end of file + entropy_score = EntropyFusionModule.entropy(cls.classifiersDecisions, + cls.combination, + cls.foldsGroudTruth, + cls.foldsLen) + cls.assertEqual(entropy_score, 0.15, 'Wrong values for entropy measure') diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py index 8aa7084dda992a7ac15d47e77f9213423eb1cbbe..b60b2e0ed749409bc99052cf21d5d25d0c1d6d22 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_Fusion/test_FusionModule.py @@ -1,6 +1,7 @@ import unittest -from ....MonoMultiViewClassifiers.MultiviewClassifiers.Fusion import FusionModule +from ....MonoMultiViewClassifiers.MultiviewClassifiers.Fusion import \ + FusionModule class Test_genName(unittest.TestCase): @@ -11,4 +12,3 @@ class Test_genName(unittest.TestCase): "classifiersNames": ["cheese", "is", "no", "disease"]} res = FusionModule.genName(self.config) self.assertEqual(res, "Late-chic") - diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py index 60d6559f5f66e3bb9e927caa1b9ca367b216a8a9..957cef74647fbdc8be700070de638b9c1ce5fa24 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/Test_PseudoCQMeasure/test_PseudoCQFusionModule.py @@ -19,4 +19,4 @@ # # def test_simple(cls): # entropy_score = EntropyFusionModule.entropy(cls.classifiersDecisions, cls.combination, cls.foldsGroudTruth,cls.foldsLen) -# cls.assertEqual(entropy_score, 0.15, 'Wrong values for entropy measure') \ No newline at end of file +# cls.assertEqual(entropy_score, 0.15, 'Wrong values for entropy measure') diff --git a/multiview_platform/Tests/Test_MultiviewClassifiers/test_diversity_utils.py b/multiview_platform/Tests/Test_MultiviewClassifiers/test_diversity_utils.py index 1ce1fa3f4f7e8a7281d672144e2317517af39b2f..318058f20176d0e207b1716748ecccf0f3351980 100644 --- a/multiview_platform/Tests/Test_MultiviewClassifiers/test_diversity_utils.py +++ b/multiview_platform/Tests/Test_MultiviewClassifiers/test_diversity_utils.py @@ -1,7 +1,9 @@ import unittest + import numpy as np -from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import diversity_utils +from multiview_platform.MonoMultiViewClassifiers.Multiview.Additions import \ + diversity_utils def fake_measure(a, b, c, d, e): @@ -13,43 +15,50 @@ class Test_global_div_measure(unittest.TestCase): @classmethod def setUpClass(cls): cls.allClassifersNames = ["SCM", "DT", "SVM"] - cls.viewsIndices = [0,1] + cls.viewsIndices = [0, 1] cls.randomState = np.random.RandomState(42) - cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + cls.resultsMonoview = [[0, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]], - [0, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + [0, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]], - [0, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2,6)]) + [0, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]], - [1, ["SCM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + [1, ["SCM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]], - [1, ["SVM", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + [1, ["SVM", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]], - [1, ["DT", "", "", "", "", "", np.array([cls.randomState.randint(0,2,6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + [1, ["DT", "", "", "", "", "", + np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) ]] ] cls.measurement = fake_measure - cls.foldsGroudTruth = np.array([cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6), - cls.randomState.randint(0,2, 6)]) + cls.foldsGroudTruth = np.array([cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6), + cls.randomState.randint(0, 2, 6)]) def test_simple(cls): - clf_names, diversity_measure = diversity_utils.global_div_measure(cls.allClassifersNames, - cls.viewsIndices, - cls.resultsMonoview, - cls.measurement, - cls.foldsGroudTruth) + clf_names, diversity_measure = diversity_utils.global_div_measure( + cls.allClassifersNames, + cls.viewsIndices, + cls.resultsMonoview, + cls.measurement, + cls.foldsGroudTruth) cls.assertEqual(len(clf_names), 2) - cls.assertEqual(diversity_measure, 42) \ No newline at end of file + cls.assertEqual(diversity_measure, 42) diff --git a/multiview_platform/Tests/Test_utils/test_GetMultiviewDB.py b/multiview_platform/Tests/Test_utils/test_GetMultiviewDB.py index 0a2e379d067da8279362aabe6f3dce8dcb4c8ecb..111cb76273ff616f5b678cb00500c13eb68d4ca6 100644 --- a/multiview_platform/Tests/Test_utils/test_GetMultiviewDB.py +++ b/multiview_platform/Tests/Test_utils/test_GetMultiviewDB.py @@ -1,7 +1,8 @@ +import os import unittest + import h5py import numpy as np -import os from ...MonoMultiViewClassifiers.utils import GetMultiviewDb @@ -13,20 +14,30 @@ class Test_copyhdf5Dataset(unittest.TestCase): cls.random_state = np.random.RandomState(42) if not os.path.exists("multiview_platform/Tests/temp_tests"): os.mkdir("multiview_platform/Tests/temp_tests") - cls.dataset_file = h5py.File("multiview_platform/Tests/temp_tests/test_copy.hdf5", "w") - cls.dataset = cls.dataset_file.create_dataset("test", data=cls.random_state.randint(0, 100, (10, 20))) + cls.dataset_file = h5py.File( + "multiview_platform/Tests/temp_tests/test_copy.hdf5", "w") + cls.dataset = cls.dataset_file.create_dataset("test", + data=cls.random_state.randint( + 0, 100, (10, 20))) cls.dataset.attrs["test_arg"] = "Am I copied" def test_simple_copy(cls): - GetMultiviewDb.copyhdf5Dataset(cls.dataset_file, cls.dataset_file, "test", "test_copy_1", np.arange(10)) - np.testing.assert_array_equal(cls.dataset_file.get("test").value, cls.dataset_file.get("test_copy_1").value) - cls.assertEqual("Am I copied", cls.dataset_file.get("test_copy_1").attrs["test_arg"]) + GetMultiviewDb.copyhdf5Dataset(cls.dataset_file, cls.dataset_file, + "test", "test_copy_1", np.arange(10)) + np.testing.assert_array_equal(cls.dataset_file.get("test").value, + cls.dataset_file.get("test_copy_1").value) + cls.assertEqual("Am I copied", + cls.dataset_file.get("test_copy_1").attrs["test_arg"]) def test_copy_only_some_indices(cls): - usedIndices = cls.random_state.choice(10,6, replace=False) - GetMultiviewDb.copyhdf5Dataset(cls.dataset_file, cls.dataset_file, "test", "test_copy", usedIndices) - np.testing.assert_array_equal(cls.dataset_file.get("test").value[usedIndices, :], cls.dataset_file.get("test_copy").value) - cls.assertEqual("Am I copied", cls.dataset_file.get("test_copy").attrs["test_arg"]) + usedIndices = cls.random_state.choice(10, 6, replace=False) + GetMultiviewDb.copyhdf5Dataset(cls.dataset_file, cls.dataset_file, + "test", "test_copy", usedIndices) + np.testing.assert_array_equal( + cls.dataset_file.get("test").value[usedIndices, :], + cls.dataset_file.get("test_copy").value) + cls.assertEqual("Am I copied", + cls.dataset_file.get("test_copy").attrs["test_arg"]) @classmethod def tearDownClass(cls): @@ -34,7 +45,6 @@ class Test_copyhdf5Dataset(unittest.TestCase): os.rmdir("multiview_platform/Tests/temp_tests") - class Test_filterViews(unittest.TestCase): @classmethod @@ -43,31 +53,40 @@ class Test_filterViews(unittest.TestCase): cls.views = ["test_view_1", "test_view_2"] if not os.path.exists("multiview_platform/Tests/temp_tests"): os.mkdir("multiview_platform/Tests/temp_tests") - cls.dataset_file = h5py.File("multiview_platform/Tests/temp_tests/test_copy.hdf5", "w") + cls.dataset_file = h5py.File( + "multiview_platform/Tests/temp_tests/test_copy.hdf5", "w") cls.metadata_group = cls.dataset_file.create_group("Metadata") cls.metadata_group.attrs["nbView"] = 4 for i in range(4): - cls.dataset = cls.dataset_file.create_dataset("View"+str(i), - data=cls.random_state.randint(0, 100, (10, 20))) - cls.dataset.attrs["name"] = "test_view_"+str(i) + cls.dataset = cls.dataset_file.create_dataset("View" + str(i), + data=cls.random_state.randint( + 0, 100, (10, 20))) + cls.dataset.attrs["name"] = "test_view_" + str(i) def test_simple_filter(cls): - cls.temp_dataset_file = h5py.File("multiview_platform/Tests/temp_tests/test_copy_temp.hdf5", "w") + cls.temp_dataset_file = h5py.File( + "multiview_platform/Tests/temp_tests/test_copy_temp.hdf5", "w") cls.dataset_file.copy("Metadata", cls.temp_dataset_file) - GetMultiviewDb.filterViews(cls.dataset_file, cls.temp_dataset_file, cls.views, np.arange(10)) + GetMultiviewDb.filterViews(cls.dataset_file, cls.temp_dataset_file, + cls.views, np.arange(10)) cls.assertEqual(cls.dataset_file.get("View1").attrs["name"], cls.temp_dataset_file.get("View0").attrs["name"]) - np.testing.assert_array_equal(cls.dataset_file.get("View2").value, cls.temp_dataset_file.get("View1").value) - cls.assertEqual(cls.temp_dataset_file.get("Metadata").attrs["nbView"], 2) + np.testing.assert_array_equal(cls.dataset_file.get("View2").value, + cls.temp_dataset_file.get("View1").value) + cls.assertEqual(cls.temp_dataset_file.get("Metadata").attrs["nbView"], + 2) def test_filter_view_and_examples(cls): - cls.temp_dataset_file = h5py.File("multiview_platform/Tests/temp_tests/test_copy_temp.hdf5", "w") + cls.temp_dataset_file = h5py.File( + "multiview_platform/Tests/temp_tests/test_copy_temp.hdf5", "w") cls.dataset_file.copy("Metadata", cls.temp_dataset_file) usedIndices = cls.random_state.choice(10, 6, replace=False) - GetMultiviewDb.filterViews(cls.dataset_file, cls.temp_dataset_file, cls.views, usedIndices) - np.testing.assert_array_equal(cls.dataset_file.get("View1").value[usedIndices, :], - cls.temp_dataset_file.get("View0").value) + GetMultiviewDb.filterViews(cls.dataset_file, cls.temp_dataset_file, + cls.views, usedIndices) + np.testing.assert_array_equal( + cls.dataset_file.get("View1").value[usedIndices, :], + cls.temp_dataset_file.get("View0").value) cls.temp_dataset_file.close() @classmethod @@ -76,6 +95,7 @@ class Test_filterViews(unittest.TestCase): os.remove("multiview_platform/Tests/temp_tests/test_copy_temp.hdf5") os.rmdir("multiview_platform/Tests/temp_tests") + # class Test_filterLabels(unittest.TestCase): @@ -84,8 +104,9 @@ class Test_filterLabels(unittest.TestCase): cls.random_state = np.random.RandomState(42) cls.labelsSet = set(range(4)) cls.askedLabelsNamesSet = {"test_label_1", "test_label_3"} - cls.fullLabels = cls.random_state.randint(0,4,10) - cls.availableLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + cls.fullLabels = cls.random_state.randint(0, 4, 10) + cls.availableLabelsNames = ["test_label_0", "test_label_1", + "test_label_2", "test_label_3"] cls.askedLabelsNames = ["test_label_1", "test_label_3"] def test_simple(cls): @@ -98,11 +119,11 @@ class Test_filterLabels(unittest.TestCase): cls.askedLabelsNames) cls.assertEqual(["test_label_1", "test_label_3"], newLabelsNames) np.testing.assert_array_equal(usedIndices, np.array([1, 5, 9])) - np.testing.assert_array_equal(newLabels, np.array([1,1,0])) + np.testing.assert_array_equal(newLabels, np.array([1, 1, 0])) def test_biclasse(cls): - cls.labelsSet = {0,1} - cls.fullLabels = cls.random_state.randint(0,2,10) + cls.labelsSet = {0, 1} + cls.fullLabels = cls.random_state.randint(0, 2, 10) cls.availableLabelsNames = ["test_label_0", "test_label_1"] newLabels, \ newLabelsNames, \ @@ -116,7 +137,9 @@ class Test_filterLabels(unittest.TestCase): np.testing.assert_array_equal(newLabels, cls.fullLabels) def test_asked_too_many_labels(cls): - cls.askedLabelsNamesSet = {"test_label_0", "test_label_1", "test_label_2", "test_label_3", "chicken_is_heaven"} + cls.askedLabelsNamesSet = {"test_label_0", "test_label_1", + "test_label_2", "test_label_3", + "chicken_is_heaven"} with cls.assertRaises(GetMultiviewDb.DatasetError) as catcher: GetMultiviewDb.filterLabels(cls.labelsSet, cls.askedLabelsNamesSet, @@ -126,8 +149,10 @@ class Test_filterLabels(unittest.TestCase): exception = catcher.exception def test_asked_all_labels(cls): - cls.askedLabelsNamesSet = {"test_label_0", "test_label_1", "test_label_2", "test_label_3"} - cls.askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + cls.askedLabelsNamesSet = {"test_label_0", "test_label_1", + "test_label_2", "test_label_3"} + cls.askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", + "test_label_3"] newLabels, \ newLabelsNames, \ usedIndices = GetMultiviewDb.filterLabels(cls.labelsSet, @@ -147,7 +172,8 @@ class Test_selectAskedLabels(unittest.TestCase): cls.random_state = np.random.RandomState(42) cls.askedLabelsNamesSet = {"test_label_1", "test_label_3"} cls.fullLabels = cls.random_state.randint(0, 4, 10) - cls.availableLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + cls.availableLabelsNames = ["test_label_0", "test_label_1", + "test_label_2", "test_label_3"] cls.askedLabelsNames = ["test_label_1", "test_label_3"] def test_simple(cls): @@ -162,8 +188,10 @@ class Test_selectAskedLabels(unittest.TestCase): np.testing.assert_array_equal(newLabels, np.array([1, 1, 0])) def test_asked_all_labels(cls): - cls.askedLabelsNamesSet = {"test_label_0", "test_label_1", "test_label_2", "test_label_3"} - cls.askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + cls.askedLabelsNamesSet = {"test_label_0", "test_label_1", + "test_label_2", "test_label_3"} + cls.askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", + "test_label_3"] newLabels, \ newLabelsNames, \ usedIndices = GetMultiviewDb.selectAskedLabels(cls.askedLabelsNamesSet, @@ -175,7 +203,8 @@ class Test_selectAskedLabels(unittest.TestCase): np.testing.assert_array_equal(newLabels, cls.fullLabels) def test_asked_unavailable_labels(cls): - cls.askedLabelsNamesSet = {"test_label_1", "test_label_3", "chicken_is_heaven"} + cls.askedLabelsNamesSet = {"test_label_1", "test_label_3", + "chicken_is_heaven"} with cls.assertRaises(GetMultiviewDb.DatasetError) as catcher: GetMultiviewDb.selectAskedLabels(cls.askedLabelsNamesSet, cls.availableLabelsNames, @@ -191,10 +220,12 @@ class Test_getAllLabels(unittest.TestCase): def setUpClass(cls): cls.random_state = np.random.RandomState(42) cls.fullLabels = cls.random_state.randint(0, 4, 10) - cls.availableLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + cls.availableLabelsNames = ["test_label_0", "test_label_1", + "test_label_2", "test_label_3"] def test_simple(cls): - newLabels, newLabelsNames, usedIndices = GetMultiviewDb.getAllLabels(cls.fullLabels, cls.availableLabelsNames) + newLabels, newLabelsNames, usedIndices = GetMultiviewDb.getAllLabels( + cls.fullLabels, cls.availableLabelsNames) cls.assertEqual(cls.availableLabelsNames, newLabelsNames) np.testing.assert_array_equal(usedIndices, np.arange(10)) np.testing.assert_array_equal(newLabels, cls.fullLabels) @@ -207,33 +238,52 @@ class Test_fillLabelNames(unittest.TestCase): cls.NB_CLASS = 2 cls.askedLabelsNames = ["test_label_1", "test_label_3"] cls.randomState = np.random.RandomState(42) - cls.availableLabelsNames = ["test_label_"+str(_) for _ in range(40)] + cls.availableLabelsNames = ["test_label_" + str(_) for _ in range(40)] def test_simple(cls): - askedLabelsNames, askedLabelsNamesSet = GetMultiviewDb.fillLabelNames(cls.NB_CLASS, - cls.askedLabelsNames, - cls.randomState, - cls.availableLabelsNames) + askedLabelsNames, askedLabelsNamesSet = GetMultiviewDb.fillLabelNames( + cls.NB_CLASS, + cls.askedLabelsNames, + cls.randomState, + cls.availableLabelsNames) cls.assertEqual(askedLabelsNames, cls.askedLabelsNames) cls.assertEqual(askedLabelsNamesSet, set(cls.askedLabelsNames)) def test_missing_labels_names(cls): cls.NB_CLASS = 39 - askedLabelsNames, askedLabelsNamesSet = GetMultiviewDb.fillLabelNames(cls.NB_CLASS, - cls.askedLabelsNames, - cls.randomState, - cls.availableLabelsNames) - - cls.assertEqual(askedLabelsNames, ['test_label_1', 'test_label_3', 'test_label_35', 'test_label_38', 'test_label_6', 'test_label_15', 'test_label_32', 'test_label_28', 'test_label_8', 'test_label_29', 'test_label_26', 'test_label_17', 'test_label_19', 'test_label_10', 'test_label_18', 'test_label_14', 'test_label_21', 'test_label_11', 'test_label_34', 'test_label_0', 'test_label_27', 'test_label_7', 'test_label_13', 'test_label_2', 'test_label_39', 'test_label_23', 'test_label_4', 'test_label_31', 'test_label_37', 'test_label_5', 'test_label_36', 'test_label_25', 'test_label_33', 'test_label_12', 'test_label_24', 'test_label_20', 'test_label_22', 'test_label_9', 'test_label_16']) - cls.assertEqual(askedLabelsNamesSet, set(["test_label_"+str(_) for _ in range(30)]+["test_label_"+str(31+_) for _ in range(9)])) + askedLabelsNames, askedLabelsNamesSet = GetMultiviewDb.fillLabelNames( + cls.NB_CLASS, + cls.askedLabelsNames, + cls.randomState, + cls.availableLabelsNames) + + cls.assertEqual(askedLabelsNames, + ['test_label_1', 'test_label_3', 'test_label_35', + 'test_label_38', 'test_label_6', 'test_label_15', + 'test_label_32', 'test_label_28', 'test_label_8', + 'test_label_29', 'test_label_26', 'test_label_17', + 'test_label_19', 'test_label_10', 'test_label_18', + 'test_label_14', 'test_label_21', 'test_label_11', + 'test_label_34', 'test_label_0', 'test_label_27', + 'test_label_7', 'test_label_13', 'test_label_2', + 'test_label_39', 'test_label_23', 'test_label_4', + 'test_label_31', 'test_label_37', 'test_label_5', + 'test_label_36', 'test_label_25', 'test_label_33', + 'test_label_12', 'test_label_24', 'test_label_20', + 'test_label_22', 'test_label_9', 'test_label_16']) + cls.assertEqual(askedLabelsNamesSet, set( + ["test_label_" + str(_) for _ in range(30)] + [ + "test_label_" + str(31 + _) for _ in range(9)])) def test_too_many_label_names(cls): cls.NB_CLASS = 2 - cls.askedLabelsNames = ["test_label_1", "test_label_3", "test_label_4", "test_label_6"] - askedLabelsNames, askedLabelsNamesSet = GetMultiviewDb.fillLabelNames(cls.NB_CLASS, - cls.askedLabelsNames, - cls.randomState, - cls.availableLabelsNames) + cls.askedLabelsNames = ["test_label_1", "test_label_3", "test_label_4", + "test_label_6"] + askedLabelsNames, askedLabelsNamesSet = GetMultiviewDb.fillLabelNames( + cls.NB_CLASS, + cls.askedLabelsNames, + cls.randomState, + cls.availableLabelsNames) cls.assertEqual(askedLabelsNames, ["test_label_3", "test_label_6"]) cls.assertEqual(askedLabelsNamesSet, {"test_label_3", "test_label_6"}) @@ -243,14 +293,20 @@ class Test_allAskedLabelsAreAvailable(unittest.TestCase): @classmethod def setUpClass(cls): cls.askedLabelsNamesSet = {"test_label_1", "test_label_3"} - cls.availableLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + cls.availableLabelsNames = ["test_label_0", "test_label_1", + "test_label_2", "test_label_3"] def test_asked_available_labels(cls): - cls.assertTrue(GetMultiviewDb.allAskedLabelsAreAvailable(cls.askedLabelsNamesSet,cls.availableLabelsNames)) + cls.assertTrue( + GetMultiviewDb.allAskedLabelsAreAvailable(cls.askedLabelsNamesSet, + cls.availableLabelsNames)) def test_asked_unavailable_label(cls): - cls.askedLabelsNamesSet = {"test_label_1", "test_label_3", "chicken_is_heaven"} - cls.assertFalse(GetMultiviewDb.allAskedLabelsAreAvailable(cls.askedLabelsNamesSet,cls.availableLabelsNames)) + cls.askedLabelsNamesSet = {"test_label_1", "test_label_3", + "chicken_is_heaven"} + cls.assertFalse( + GetMultiviewDb.allAskedLabelsAreAvailable(cls.askedLabelsNamesSet, + cls.availableLabelsNames)) class Test_getClasses(unittest.TestCase): @@ -260,16 +316,18 @@ class Test_getClasses(unittest.TestCase): cls.random_state = np.random.RandomState(42) def test_multiclass(cls): - labelsSet = GetMultiviewDb.getClasses(cls.random_state.randint(0,5,30)) - cls.assertEqual(labelsSet, {0,1,2,3,4}) + labelsSet = GetMultiviewDb.getClasses( + cls.random_state.randint(0, 5, 30)) + cls.assertEqual(labelsSet, {0, 1, 2, 3, 4}) def test_biclass(cls): - labelsSet = GetMultiviewDb.getClasses(cls.random_state.randint(0,2,30)) - cls.assertEqual(labelsSet, {0,1}) + labelsSet = GetMultiviewDb.getClasses( + cls.random_state.randint(0, 2, 30)) + cls.assertEqual(labelsSet, {0, 1}) def test_one_class(cls): with cls.assertRaises(GetMultiviewDb.DatasetError) as catcher: - GetMultiviewDb.getClasses(np.zeros(30,dtype=int)) + GetMultiviewDb.getClasses(np.zeros(30, dtype=int)) exception = catcher.exception # cls.assertTrue("Dataset must have at least two different labels" in exception) @@ -280,7 +338,8 @@ class Test_getClassicDBhdf5(unittest.TestCase): def setUpClass(cls): if not os.path.exists("multiview_platform/Tests/temp_tests"): os.mkdir("multiview_platform/Tests/temp_tests") - cls.dataset_file = h5py.File("multiview_platform/Tests/temp_tests/test_dataset.hdf5", "w") + cls.dataset_file = h5py.File( + "multiview_platform/Tests/temp_tests/test_dataset.hdf5", "w") cls.pathF = "multiview_platform/Tests/temp_tests/" cls.nameDB = "test_dataset" cls.NB_CLASS = 2 @@ -289,69 +348,104 @@ class Test_getClassicDBhdf5(unittest.TestCase): cls.views = ["test_view_1", "test_view_3"] cls.metadata_group = cls.dataset_file.create_group("Metadata") cls.metadata_group.attrs["nbView"] = 4 - cls.labels_dataset = cls.dataset_file.create_dataset("Labels", data=cls.random_state.randint(0,4,10)) - cls.labels_dataset.attrs["names"] = ["test_label_0".encode(), "test_label_1".encode(), "test_label_2".encode(), "test_label_3".encode()] + cls.labels_dataset = cls.dataset_file.create_dataset("Labels", + data=cls.random_state.randint( + 0, 4, 10)) + cls.labels_dataset.attrs["names"] = ["test_label_0".encode(), + "test_label_1".encode(), + "test_label_2".encode(), + "test_label_3".encode()] for i in range(4): cls.dataset = cls.dataset_file.create_dataset("View" + str(i), - data=cls.random_state.randint(0, 100, (10, 20))) + data=cls.random_state.randint( + 0, 100, (10, 20))) cls.dataset.attrs["name"] = "test_view_" + str(i) def test_simple(cls): - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5(cls.views, cls.pathF, cls.nameDB, - cls.NB_CLASS, cls.askedLabelsNames, - cls.random_state) + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5( + cls.views, cls.pathF, cls.nameDB, + cls.NB_CLASS, cls.askedLabelsNames, + cls.random_state) cls.assertEqual(dataset_file.get("View1").attrs["name"], "test_view_3") - cls.assertEqual(labels_dictionary, {0:"test_label_1", 1:"test_label_3"}) + cls.assertEqual(labels_dictionary, + {0: "test_label_1", 1: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 3) cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 2) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 2) - np.testing.assert_array_equal(dataset_file.get("View0").value, cls.dataset_file.get("View1").value[np.array([1,5,9]),:]) + np.testing.assert_array_equal(dataset_file.get("View0").value, + cls.dataset_file.get("View1").value[ + np.array([1, 5, 9]), :]) def test_all_labels_asked(cls): - askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", + "test_label_3"] NB_CLASS = 4 - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5(cls.views, cls.pathF, cls.nameDB, - NB_CLASS, askedLabelsNames, - cls.random_state) + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5( + cls.views, cls.pathF, cls.nameDB, + NB_CLASS, askedLabelsNames, + cls.random_state) cls.assertEqual(dataset_file.get("View1").attrs["name"], "test_view_3") - cls.assertEqual(labels_dictionary, {0:"test_label_0", 1:"test_label_1", 2:"test_label_2", 3:"test_label_3"}) + cls.assertEqual(labels_dictionary, + {0: "test_label_0", 1: "test_label_1", + 2: "test_label_2", 3: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 10) cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 2) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 4) - np.testing.assert_array_equal(dataset_file.get("View0").value, cls.dataset_file.get("View1").value) + np.testing.assert_array_equal(dataset_file.get("View0").value, + cls.dataset_file.get("View1").value) def test_all_views_asked(cls): views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5(views, cls.pathF, cls.nameDB, - cls.NB_CLASS, cls.askedLabelsNames, - cls.random_state) + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5(views, + cls.pathF, + cls.nameDB, + cls.NB_CLASS, + cls.askedLabelsNames, + cls.random_state) for viewIndex in range(4): - np.testing.assert_array_equal(dataset_file.get("View"+str(viewIndex)).value, cls.dataset_file.get("View"+str(viewIndex)).value[np.array([1,5,9]),:]) - cls.assertEqual(dataset_file.get("View"+str(viewIndex)).attrs["name"], "test_view_"+str(viewIndex)) - cls.assertEqual(labels_dictionary, {0:"test_label_1", 1:"test_label_3"}) + np.testing.assert_array_equal( + dataset_file.get("View" + str(viewIndex)).value, + cls.dataset_file.get("View" + str(viewIndex)).value[ + np.array([1, 5, 9]), :]) + cls.assertEqual( + dataset_file.get("View" + str(viewIndex)).attrs["name"], + "test_view_" + str(viewIndex)) + cls.assertEqual(labels_dictionary, + {0: "test_label_1", 1: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 3) cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 4) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 2) def test_asked_the_whole_dataset(cls): - askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", + "test_label_3"] NB_CLASS = 4 views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5(views, cls.pathF, cls.nameDB, - NB_CLASS, askedLabelsNames, - cls.random_state) + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBhdf5(views, + cls.pathF, + cls.nameDB, + NB_CLASS, + askedLabelsNames, + cls.random_state) for viewIndex in range(4): - np.testing.assert_array_equal(dataset_file.get("View"+str(viewIndex)).value, cls.dataset_file.get("View"+str(viewIndex))) - cls.assertEqual(dataset_file.get("View"+str(viewIndex)).attrs["name"], "test_view_"+str(viewIndex)) - cls.assertEqual(labels_dictionary, {0:"test_label_0", 1:"test_label_1", 2:"test_label_2", 3:"test_label_3"}) + np.testing.assert_array_equal( + dataset_file.get("View" + str(viewIndex)).value, + cls.dataset_file.get("View" + str(viewIndex))) + cls.assertEqual( + dataset_file.get("View" + str(viewIndex)).attrs["name"], + "test_view_" + str(viewIndex)) + cls.assertEqual(labels_dictionary, + {0: "test_label_0", 1: "test_label_1", + 2: "test_label_2", 3: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 10) cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 4) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 4) @classmethod def tearDownClass(cls): - os.remove("multiview_platform/Tests/temp_tests/test_dataset_temp_view_label_select.hdf5") + os.remove( + "multiview_platform/Tests/temp_tests/test_dataset_temp_view_label_select.hdf5") os.remove("multiview_platform/Tests/temp_tests/test_dataset.hdf5") dirs = os.listdir("multiview_platform/Tests/temp_tests") for dir in dirs: @@ -371,67 +465,97 @@ class Test_getClassicDBcsv(unittest.TestCase): cls.askedLabelsNames = ["test_label_1", "test_label_3"] cls.random_state = np.random.RandomState(42) cls.views = ["test_view_1", "test_view_3"] - np.savetxt(cls.pathF+cls.nameDB+"-labels-names.csv", np.array(["test_label_0", "test_label_1", - "test_label_2", "test_label_3"]), fmt="%s", delimiter=",") - np.savetxt(cls.pathF+cls.nameDB+"-labels.csv", cls.random_state.randint(0,4,10), delimiter=",") - os.mkdir(cls.pathF+"Views") + np.savetxt(cls.pathF + cls.nameDB + "-labels-names.csv", + np.array(["test_label_0", "test_label_1", + "test_label_2", "test_label_3"]), fmt="%s", + delimiter=",") + np.savetxt(cls.pathF + cls.nameDB + "-labels.csv", + cls.random_state.randint(0, 4, 10), delimiter=",") + os.mkdir(cls.pathF + "Views") cls.datas = [] for i in range(4): data = cls.random_state.randint(0, 100, (10, 20)) - np.savetxt(cls.pathF+"Views/test_view_" + str(i)+".csv", - data, delimiter=",") + np.savetxt(cls.pathF + "Views/test_view_" + str(i) + ".csv", + data, delimiter=",") cls.datas.append(data) def test_simple(cls): - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv(cls.views, cls.pathF, cls.nameDB, - cls.NB_CLASS, cls.askedLabelsNames, - cls.random_state, delimiter=",") + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv( + cls.views, cls.pathF, cls.nameDB, + cls.NB_CLASS, cls.askedLabelsNames, + cls.random_state, delimiter=",") cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 2) cls.assertEqual(dataset_file.get("View1").attrs["name"], "test_view_3") cls.assertEqual(dataset_file.get("View0").attrs["name"], "test_view_1") - cls.assertEqual(labels_dictionary, {0:"test_label_1", 1:"test_label_3"}) + cls.assertEqual(labels_dictionary, + {0: "test_label_1", 1: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 3) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 2) - np.testing.assert_array_equal(dataset_file.get("View0").value, cls.datas[1][np.array([1,5,9]), :]) + np.testing.assert_array_equal(dataset_file.get("View0").value, + cls.datas[1][np.array([1, 5, 9]), :]) def test_all_views_asked(cls): views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv(views, cls.pathF, cls.nameDB, - cls.NB_CLASS, cls.askedLabelsNames, - cls.random_state, delimiter=",") - cls.assertEqual(labels_dictionary, {0: "test_label_1", 1: "test_label_3"}) + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv(views, + cls.pathF, + cls.nameDB, + cls.NB_CLASS, + cls.askedLabelsNames, + cls.random_state, + delimiter=",") + cls.assertEqual(labels_dictionary, + {0: "test_label_1", 1: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 3) cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 4) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 2) for viewIndex in range(4): - np.testing.assert_array_equal(dataset_file.get("View" + str(viewIndex)).value, - cls.datas[viewIndex][np.array([1, 5, 9]), :]) - cls.assertEqual(dataset_file.get("View" + str(viewIndex)).attrs["name"], "test_view_" + str(viewIndex)) + np.testing.assert_array_equal( + dataset_file.get("View" + str(viewIndex)).value, + cls.datas[viewIndex][np.array([1, 5, 9]), :]) + cls.assertEqual( + dataset_file.get("View" + str(viewIndex)).attrs["name"], + "test_view_" + str(viewIndex)) def test_all_labels_asked(cls): - askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", + "test_label_3"] NB_CLASS = 4 - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv(cls.views, cls.pathF, cls.nameDB, - NB_CLASS, askedLabelsNames, - cls.random_state, delimiter=",") + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv( + cls.views, cls.pathF, cls.nameDB, + NB_CLASS, askedLabelsNames, + cls.random_state, delimiter=",") cls.assertEqual(dataset_file.get("View1").attrs["name"], "test_view_3") - cls.assertEqual(labels_dictionary, {0:"test_label_0", 1:"test_label_1", 2:"test_label_2", 3:"test_label_3"}) + cls.assertEqual(labels_dictionary, + {0: "test_label_0", 1: "test_label_1", + 2: "test_label_2", 3: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 10) cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 2) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 4) - np.testing.assert_array_equal(dataset_file.get("View0").value, cls.datas[1]) + np.testing.assert_array_equal(dataset_file.get("View0").value, + cls.datas[1]) def test_asked_the_whole_dataset(cls): - askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", "test_label_3"] + askedLabelsNames = ["test_label_0", "test_label_1", "test_label_2", + "test_label_3"] NB_CLASS = 4 views = ["test_view_0", "test_view_1", "test_view_2", "test_view_3"] - dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv(views, cls.pathF, cls.nameDB, - NB_CLASS, askedLabelsNames, - cls.random_state, delimiter=",") + dataset_file, labels_dictionary = GetMultiviewDb.getClassicDBcsv(views, + cls.pathF, + cls.nameDB, + NB_CLASS, + askedLabelsNames, + cls.random_state, + delimiter=",") for viewIndex in range(4): - np.testing.assert_array_equal(dataset_file.get("View"+str(viewIndex)).value, cls.datas[viewIndex]) - cls.assertEqual(dataset_file.get("View"+str(viewIndex)).attrs["name"], "test_view_"+str(viewIndex)) - cls.assertEqual(labels_dictionary, {0:"test_label_0", 1:"test_label_1", 2:"test_label_2", 3:"test_label_3"}) + np.testing.assert_array_equal( + dataset_file.get("View" + str(viewIndex)).value, + cls.datas[viewIndex]) + cls.assertEqual( + dataset_file.get("View" + str(viewIndex)).attrs["name"], + "test_view_" + str(viewIndex)) + cls.assertEqual(labels_dictionary, + {0: "test_label_0", 1: "test_label_1", + 2: "test_label_2", 3: "test_label_3"}) cls.assertEqual(dataset_file.get("Metadata").attrs["datasetLength"], 10) cls.assertEqual(dataset_file.get("Metadata").attrs["nbView"], 4) cls.assertEqual(dataset_file.get("Metadata").attrs["nbClass"], 4) @@ -439,11 +563,16 @@ class Test_getClassicDBcsv(unittest.TestCase): @classmethod def tearDownClass(cls): for i in range(4): - os.remove("multiview_platform/Tests/temp_tests/Views/test_view_"+str(i)+".csv") + os.remove( + "multiview_platform/Tests/temp_tests/Views/test_view_" + str( + i) + ".csv") os.rmdir("multiview_platform/Tests/temp_tests/Views") - os.remove("multiview_platform/Tests/temp_tests/test_dataset-labels-names.csv") + os.remove( + "multiview_platform/Tests/temp_tests/test_dataset-labels-names.csv") os.remove("multiview_platform/Tests/temp_tests/test_dataset-labels.csv") os.remove("multiview_platform/Tests/temp_tests/test_dataset.hdf5") - os.remove("multiview_platform/Tests/temp_tests/test_dataset_temp_view_label_select.hdf5") - for file in os.listdir("multiview_platform/Tests/temp_tests"):print(file) - os.rmdir("multiview_platform/Tests/temp_tests") \ No newline at end of file + os.remove( + "multiview_platform/Tests/temp_tests/test_dataset_temp_view_label_select.hdf5") + for file in os.listdir("multiview_platform/Tests/temp_tests"): print( + file) + os.rmdir("multiview_platform/Tests/temp_tests") diff --git a/multiview_platform/Tests/Test_utils/test_execution.py b/multiview_platform/Tests/Test_utils/test_execution.py index 6e34c649c9b0a499987243cd1edebbfcebfe2ec8..b2beaf5e163002dfd7f86522c9972038e3c38e03 100644 --- a/multiview_platform/Tests/Test_utils/test_execution.py +++ b/multiview_platform/Tests/Test_utils/test_execution.py @@ -1,12 +1,7 @@ -import unittest -import argparse import os -import h5py -import numpy as np -import shutil -import time +import unittest -from sklearn.model_selection import StratifiedShuffleSplit +import numpy as np from ...MonoMultiViewClassifiers.utils import execution @@ -30,16 +25,23 @@ class Test_initStatsIterRandomStates(unittest.TestCase): def test_one_statiter(cls): cls.state = cls.randomState.get_state()[1] - statsIterRandomStates = execution.initStatsIterRandomStates(cls.statsIter, cls.randomState) - np.testing.assert_array_equal(statsIterRandomStates[0].get_state()[1], cls.state) + statsIterRandomStates = execution.initStatsIterRandomStates( + cls.statsIter, cls.randomState) + np.testing.assert_array_equal(statsIterRandomStates[0].get_state()[1], + cls.state) def test_multiple_iter(cls): cls.statsIter = 3 - statsIterRandomStates = execution.initStatsIterRandomStates(cls.statsIter, cls.randomState) + statsIterRandomStates = execution.initStatsIterRandomStates( + cls.statsIter, cls.randomState) cls.assertAlmostEqual(len(statsIterRandomStates), 3) - cls.assertNotEqual(statsIterRandomStates[0].randint(5000), statsIterRandomStates[1].randint(5000)) - cls.assertNotEqual(statsIterRandomStates[0].randint(5000), statsIterRandomStates[2].randint(5000)) - cls.assertNotEqual(statsIterRandomStates[2].randint(5000), statsIterRandomStates[1].randint(5000)) + cls.assertNotEqual(statsIterRandomStates[0].randint(5000), + statsIterRandomStates[1].randint(5000)) + cls.assertNotEqual(statsIterRandomStates[0].randint(5000), + statsIterRandomStates[2].randint(5000)) + cls.assertNotEqual(statsIterRandomStates[2].randint(5000), + statsIterRandomStates[1].randint(5000)) + class Test_getDatabaseFunction(unittest.TestCase): @@ -50,20 +52,23 @@ class Test_getDatabaseFunction(unittest.TestCase): def test_simple(cls): getDB = execution.getDatabaseFunction(cls.name, cls.type) - from ...MonoMultiViewClassifiers.utils.GetMultiviewDb import getClassicDBcsv + from ...MonoMultiViewClassifiers.utils.GetMultiviewDb import \ + getClassicDBcsv cls.assertEqual(getDB, getClassicDBcsv) def test_hdf5(cls): cls.type = ".hdf5" getDB = execution.getDatabaseFunction(cls.name, cls.type) - from ...MonoMultiViewClassifiers.utils.GetMultiviewDb import getClassicDBhdf5 + from ...MonoMultiViewClassifiers.utils.GetMultiviewDb import \ + getClassicDBhdf5 cls.assertEqual(getDB, getClassicDBhdf5) def test_plausible_hdf5(cls): cls.name = "Plausible" cls.type = ".hdf5" getDB = execution.getDatabaseFunction(cls.name, cls.type) - from ...MonoMultiViewClassifiers.utils.GetMultiviewDb import getPlausibleDBhdf5 + from ...MonoMultiViewClassifiers.utils.GetMultiviewDb import \ + getPlausibleDBhdf5 cls.assertEqual(getDB, getPlausibleDBhdf5) @@ -77,28 +82,32 @@ class Test_initRandomState(unittest.TestCase): def test_random_state_42(self): randomState_42 = np.random.RandomState(42) - randomState = execution.initRandomState("42", "multiview_platform/Tests/temp_tests/") + randomState = execution.initRandomState("42", + "multiview_platform/Tests/temp_tests/") os.remove("multiview_platform/Tests/temp_tests/randomState.pickle") - np.testing.assert_array_equal(randomState.beta(1,100,100), - randomState_42.beta(1,100,100)) + np.testing.assert_array_equal(randomState.beta(1, 100, 100), + randomState_42.beta(1, 100, 100)) def test_random_state_pickle(self): - randomState_to_pickle = execution.initRandomState(None, "multiview_platform/Tests/temp_tests/") - pickled_randomState = execution.initRandomState("multiview_platform/Tests/temp_tests/randomState.pickle", - "multiview_platform/Tests/temp_tests/") + randomState_to_pickle = execution.initRandomState(None, + "multiview_platform/Tests/temp_tests/") + pickled_randomState = execution.initRandomState( + "multiview_platform/Tests/temp_tests/randomState.pickle", + "multiview_platform/Tests/temp_tests/") os.remove("multiview_platform/Tests/temp_tests/randomState.pickle") - np.testing.assert_array_equal(randomState_to_pickle.beta(1,100,100), - pickled_randomState.beta(1,100,100)) + np.testing.assert_array_equal(randomState_to_pickle.beta(1, 100, 100), + pickled_randomState.beta(1, 100, 100)) class FakeArg(): def __init__(self): self.name = "zrtTap" - self.CL_type = ["fromage","jambon"] + self.CL_type = ["fromage", "jambon"] self.views = ["view1", "view2"] self.log = True + # Impossible to test as the main directory is notthe same for the exec and the test # class Test_initLogFile(unittest.TestCase): # @@ -125,41 +134,44 @@ class Test_genSplits(unittest.TestCase): def setUp(self): self.stastIter = 3 - self.statsIterRandomStates = [np.random.RandomState(42+i+1) for i in range(self.stastIter)] + self.statsIterRandomStates = [np.random.RandomState(42 + i + 1) for i in + range(self.stastIter)] self.random_state = np.random.RandomState(42) - self.X_indices = self.random_state.randint(0,500,50) + self.X_indices = self.random_state.randint(0, 500, 50) self.labels = np.zeros(500) self.labels[self.X_indices[:10]] = 1 self.labels[self.X_indices[11:30]] = 2 # To test multiclass self.splitRatio = 0.2 def test_simple(self): - splits = execution.genSplits(self.labels, self.splitRatio, self.statsIterRandomStates) + splits = execution.genSplits(self.labels, self.splitRatio, + self.statsIterRandomStates) self.assertEqual(len(splits), 3) self.assertEqual(len(splits[1]), 2) self.assertEqual(type(splits[1][0]), np.ndarray) - self.assertAlmostEqual(len(splits[1][0]), 0.8*500) - self.assertAlmostEqual(len(splits[1][1]), 0.2*500) - self.assertGreater(len(np.where(self.labels[splits[1][0]]==0)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[1][0]]==1)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[1][0]]==2)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[1][1]]==0)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[1][1]]==1)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[1][1]]==2)[0]), 0) + self.assertAlmostEqual(len(splits[1][0]), 0.8 * 500) + self.assertAlmostEqual(len(splits[1][1]), 0.2 * 500) + self.assertGreater(len(np.where(self.labels[splits[1][0]] == 0)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[1][0]] == 1)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[1][0]] == 2)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[1][1]] == 0)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[1][1]] == 1)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[1][1]] == 2)[0]), 0) def test_genSplits_no_iter(self): - splits = execution.genSplits(self.labels, self.splitRatio, self.statsIterRandomStates) + splits = execution.genSplits(self.labels, self.splitRatio, + self.statsIterRandomStates) self.assertEqual(len(splits), 3) self.assertEqual(len(splits[0]), 2) self.assertEqual(type(splits[0][0]), np.ndarray) - self.assertAlmostEqual(len(splits[0][0]), 0.8*500) - self.assertAlmostEqual(len(splits[0][1]), 0.2*500) - self.assertGreater(len(np.where(self.labels[splits[0][0]]==0)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[0][0]]==1)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[0][0]]==2)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[0][1]]==0)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[0][1]]==1)[0]), 0) - self.assertGreater(len(np.where(self.labels[splits[0][1]]==2)[0]), 0) + self.assertAlmostEqual(len(splits[0][0]), 0.8 * 500) + self.assertAlmostEqual(len(splits[0][1]), 0.2 * 500) + self.assertGreater(len(np.where(self.labels[splits[0][0]] == 0)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[0][0]] == 1)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[0][0]] == 2)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[0][1]] == 0)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[0][1]] == 1)[0]), 0) + self.assertGreater(len(np.where(self.labels[splits[0][1]] == 2)[0]), 0) class Test_genKFolds(unittest.TestCase): @@ -167,7 +179,8 @@ class Test_genKFolds(unittest.TestCase): def setUp(self): self.statsIter = 2 self.nbFolds = 5 - self.statsIterRandomStates = [np.random.RandomState(42), np.random.RandomState(94)] + self.statsIterRandomStates = [np.random.RandomState(42), + np.random.RandomState(94)] def test_genKFolds_iter(self): pass @@ -181,25 +194,28 @@ class Test_genDirecortiesNames(unittest.TestCase): cls.stats_iter = 5 def test_simple_ovo(cls): - directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter) + directories = execution.genDirecortiesNames(cls.directory, + cls.stats_iter) cls.assertEqual(len(directories), 5) cls.assertEqual(directories[0], "../chicken_is_heaven/iter_1/") cls.assertEqual(directories[-1], "../chicken_is_heaven/iter_5/") def test_ovo_no_iter(cls): cls.stats_iter = 1 - directories = execution.genDirecortiesNames(cls.directory, cls.stats_iter) + directories = execution.genDirecortiesNames(cls.directory, + cls.stats_iter) cls.assertEqual(len(directories), 1) cls.assertEqual(directories[0], "../chicken_is_heaven/") + class Test_genArgumentDictionaries(unittest.TestCase): @classmethod def setUpClass(cls): - cls.labelsDictionary = {0:"yes", 1:"No", 2:"Maybe"} + cls.labelsDictionary = {0: "yes", 1: "No", 2: "Maybe"} cls.direcories = ["Res/iter_1", "Res/iter_2"] cls.multiclassLabels = [np.array([0, 1, -100, 1, 0]), np.array([1, 0, -100, 1, 0]), np.array([0, 1, -100, 0, 1])] - cls.labelsCombinations = [[0,1], [0,2], [1,2]] - cls.indicesMulticlass = [[[[], []], [[], []], [[], []]], [[], [], []]] \ No newline at end of file + cls.labelsCombinations = [[0, 1], [0, 2], [1, 2]] + cls.indicesMulticlass = [[[[], []], [[], []], [[], []]], [[], [], []]] diff --git a/multiview_platform/Tests/Test_utils/test_multiclass.py b/multiview_platform/Tests/Test_utils/test_multiclass.py index d430cf24b9be43af7d1a7bd85d87dc3d48168e0d..c6bcc6ca532c1a96c3855211602c0c5405669907 100644 --- a/multiview_platform/Tests/Test_utils/test_multiclass.py +++ b/multiview_platform/Tests/Test_utils/test_multiclass.py @@ -1,4 +1,5 @@ import unittest + import numpy as np from ...MonoMultiViewClassifiers.utils import Multiclass @@ -9,20 +10,36 @@ class Test_genMulticlassLabels(unittest.TestCase): @classmethod def setUpClass(cls): cls.random_state = np.random.RandomState(42) - cls.labels = cls.random_state.randint(0,5,50) - cls.testIndices = [cls.random_state.choice(np.arange(50),size=10, replace=False), cls.random_state.choice(np.arange(50),size=10, replace=False)] - cls.classificationIndices = [[np.array([_ for _ in range(50) if _ not in cls.testIndices[0]]), cls.testIndices[0]], - [np.array([_ for _ in range(50) if _ not in cls.testIndices[1]]), cls.testIndices[1]]] + cls.labels = cls.random_state.randint(0, 5, 50) + cls.testIndices = [ + cls.random_state.choice(np.arange(50), size=10, replace=False), + cls.random_state.choice(np.arange(50), size=10, replace=False)] + cls.classificationIndices = [ + [np.array([_ for _ in range(50) if _ not in cls.testIndices[0]]), + cls.testIndices[0]], + [np.array([_ for _ in range(50) if _ not in cls.testIndices[1]]), + cls.testIndices[1]]] def test_one_versus_one(cls): - multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels(cls.labels, "oneVersusOne", cls.classificationIndices) + multiclassLabels, labelsIndices, oldIndicesMulticlass = Multiclass.genMulticlassLabels( + cls.labels, "oneVersusOne", cls.classificationIndices) cls.assertEqual(len(multiclassLabels), 10) - cls.assertEqual(labelsIndices, [(0,1), (0,2), (0,3), (0,4), (1,2), (1,3), (1,4), (2,3), (2,4), (3,4)]) + cls.assertEqual(labelsIndices, + [(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), + (2, 3), (2, 4), (3, 4)]) np.testing.assert_array_equal(oldIndicesMulticlass[0][0][0], - np.array([5, 13, 15, 18, 20, 24, 27, 39, 41, 43, 44, 45, 46, 48])) + np.array( + [5, 13, 15, 18, 20, 24, 27, 39, 41, + 43, 44, 45, 46, 48])) np.testing.assert_array_equal(multiclassLabels[0], - np.array([-100, -100, -100, -100, -100, 0, -100, -100, -100, -100, -100, -100, - -100, 0, -100, 0, -100, -100, 1, -100, 0, -100, -100, 1, 1, -100, -100, - 0, -100, -100, -100, -100, -100, 1, -100, -100, -100, -100, 1, 0, -100, - 1, -100, 0, 0, 1, 0, -100, 0, -100 ])) - + np.array([-100, -100, -100, -100, -100, 0, + -100, -100, -100, -100, -100, + -100, + -100, 0, -100, 0, -100, -100, 1, + -100, 0, -100, -100, 1, 1, -100, + -100, + 0, -100, -100, -100, -100, -100, + 1, -100, -100, -100, -100, 1, 0, + -100, + 1, -100, 0, 0, 1, 0, -100, 0, + -100])) diff --git a/multiview_platform/Tests/__init__.py b/multiview_platform/Tests/__init__.py index 324d3b04d0039c87646f00206fc6d2f9c628ec5c..3fd5c6b4ff2b4d4fa0a02e0647991fb2e204bf8d 100644 --- a/multiview_platform/Tests/__init__.py +++ b/multiview_platform/Tests/__init__.py @@ -1 +1 @@ -from . import test_ExecClassif \ No newline at end of file +from . import test_ExecClassif diff --git a/multiview_platform/Tests/test_ExecClassif.py b/multiview_platform/Tests/test_ExecClassif.py index 7701c24c0549717ca85f63b99a1fb66c52c87aab..5d7bccb97c914dacba001129e849913ab341f20d 100644 --- a/multiview_platform/Tests/test_ExecClassif.py +++ b/multiview_platform/Tests/test_ExecClassif.py @@ -1,26 +1,23 @@ -import unittest -import argparse import os -import numpy as np +import unittest + import h5py -from sklearn.metrics import accuracy_score +import numpy as np from ..MonoMultiViewClassifiers import ExecClassif - - class Test_initBenchmark(unittest.TestCase): def test_benchmark_wanted(self): # benchmark_output = ExecClassif.initBenchmark(self.args) - self.assertEqual(1,1) + self.assertEqual(1, 1) class Test_initKWARGS(unittest.TestCase): def test_initKWARGSFunc_no_monoview(self): - benchmark = {"Monoview":{}, "Multiview":{}} + benchmark = {"Monoview": {}, "Multiview": {}} args = ExecClassif.initKWARGSFunc({}, benchmark) self.assertEqual(args, {}) @@ -28,12 +25,12 @@ class Test_initKWARGS(unittest.TestCase): class Test_initMonoviewArguments(unittest.TestCase): def test_initMonoviewArguments_no_monoview(self): - benchmark = {"Monoview":{}, "Multiview":{}} + benchmark = {"Monoview": {}, "Multiview": {}} arguments = ExecClassif.initMonoviewExps(benchmark, {}, 0, {}) - self.assertEqual(arguments, {'Monoview':[], 'Multiview':[]}) + self.assertEqual(arguments, {'Monoview': [], 'Multiview': []}) def test_initMonoviewArguments_empty(self): - benchmark = {"Monoview":{}, "Multiview":{}} + benchmark = {"Monoview": {}, "Multiview": {}} arguments = ExecClassif.initMonoviewExps(benchmark, {}, 0, {}) @@ -42,77 +39,109 @@ def fakeBenchmarkExec(coreIndex=-1, a=7, args=1): def fakeBenchmarkExec_mutlicore(nbCores=-1, a=6, args=1): - return [nbCores,a] + return [nbCores, a] + def fakeBenchmarkExec_monocore(DATASET=1, a=4, args=1): return [a] -def fakegetResults(results, statsIter, nbMulticlass, benchmarkArgumentsDictionaries, multiClassLabels, metrics, - classificationIndices, directories, directory, labelsDictionary, nbExamples, nbLabels): + +def fakegetResults(results, statsIter, nbMulticlass, + benchmarkArgumentsDictionaries, multiClassLabels, metrics, + classificationIndices, directories, directory, + labelsDictionary, nbExamples, nbLabels): return 3 -def fakeDelete(a, b,c): + +def fakeDelete(a, b, c): return 9 + class Test_execBenchmark(unittest.TestCase): @classmethod def setUpClass(cls): os.mkdir("multiview_platform/Tests/tmp_tests") - cls.Dataset = h5py.File("multiview_platform/Tests/tmp_tests/test_file.hdf5", "w") - cls.labels = cls.Dataset.create_dataset("Labels", data=np.array([0, 1, 2])) - cls.argumentDictionaries = [{"a": 4, "args":FakeArg()}] + cls.Dataset = h5py.File( + "multiview_platform/Tests/tmp_tests/test_file.hdf5", "w") + cls.labels = cls.Dataset.create_dataset("Labels", + data=np.array([0, 1, 2])) + cls.argumentDictionaries = [{"a": 4, "args": FakeArg()}] def test_simple(cls): - res = ExecClassif.execBenchmark(1,2,3,cls.argumentDictionaries,[[[1,2], [3,4,5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, + res = ExecClassif.execBenchmark(1, 2, 3, cls.argumentDictionaries, + [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, + 10, cls.Dataset, + execOneBenchmark=fakeBenchmarkExec, execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, - execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, getResults=fakegetResults, delete=fakeDelete) + execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, + getResults=fakegetResults, + delete=fakeDelete) cls.assertEqual(res, [[4]]) def test_multiclass_no_iter(cls): - cls.argumentDictionaries = [{"a": 10, "args":FakeArg()}, {"a": 4, "args":FakeArg()}] - res = ExecClassif.execBenchmark(2,1,2,cls.argumentDictionaries,[[[1,2], [3,4,5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, + cls.argumentDictionaries = [{"a": 10, "args": FakeArg()}, + {"a": 4, "args": FakeArg()}] + res = ExecClassif.execBenchmark(2, 1, 2, cls.argumentDictionaries, + [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, + 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, - getResults=fakegetResults, delete=fakeDelete) - cls.assertEqual(res, [[0,10], [1,4]]) + getResults=fakegetResults, + delete=fakeDelete) + cls.assertEqual(res, [[0, 10], [1, 4]]) def test_multiclass_and_iter(cls): - cls.argumentDictionaries = [{"a": 10, "args":FakeArg()}, {"a": 4, "args":FakeArg()}, {"a": 55, "args":FakeArg()}, {"a": 24, "args":FakeArg()}] - res = ExecClassif.execBenchmark(2,2,2,cls.argumentDictionaries,[[[1,2], [3,4,5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, + cls.argumentDictionaries = [{"a": 10, "args": FakeArg()}, + {"a": 4, "args": FakeArg()}, + {"a": 55, "args": FakeArg()}, + {"a": 24, "args": FakeArg()}] + res = ExecClassif.execBenchmark(2, 2, 2, cls.argumentDictionaries, + [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, + 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, - getResults=fakegetResults, delete=fakeDelete) - cls.assertEqual(res, [[0,10], [1,4], [0,55], [1,24]]) + getResults=fakegetResults, + delete=fakeDelete) + cls.assertEqual(res, [[0, 10], [1, 4], [0, 55], [1, 24]]) def test_no_iter_biclass_multicore(cls): - res = ExecClassif.execBenchmark(2,1,1,cls.argumentDictionaries,[[[1,2], [3,4,5]]], 5, 6, 7, 8, 9, 10, cls.Dataset, + res = ExecClassif.execBenchmark(2, 1, 1, cls.argumentDictionaries, + [[[1, 2], [3, 4, 5]]], 5, 6, 7, 8, 9, + 10, cls.Dataset, execOneBenchmark=fakeBenchmarkExec, execOneBenchmark_multicore=fakeBenchmarkExec_mutlicore, execOneBenchmarkMonoCore=fakeBenchmarkExec_monocore, - getResults=fakegetResults, delete=fakeDelete) - cls.assertEqual(res, [[2,4]]) + getResults=fakegetResults, + delete=fakeDelete) + cls.assertEqual(res, [[2, 4]]) @classmethod def tearDownClass(cls): os.remove("multiview_platform/Tests/tmp_tests/test_file.hdf5") os.rmdir("multiview_platform/Tests/tmp_tests") -def fakeExecMono(directory, name, labelsNames, classificationIndices, kFolds, coreIndex, type, pathF, randomState, labels, + +def fakeExecMono(directory, name, labelsNames, classificationIndices, kFolds, + coreIndex, type, pathF, randomState, labels, hyperParamSearch="try", metrics="try", nIter=1, **arguments): return ["Mono", arguments] -def fakeExecMulti(directory, coreIndex, name, classificationIndices, kFolds, type, pathF, LABELS_DICTIONARY, - randomState, labels, hyperParamSearch="",metrics=None, nIter=1, **arguments): +def fakeExecMulti(directory, coreIndex, name, classificationIndices, kFolds, + type, pathF, LABELS_DICTIONARY, + randomState, labels, hyperParamSearch="", metrics=None, + nIter=1, **arguments): return ["Multi", arguments] def fakeInitMulti(args, benchmark, views, viewsIndices, argumentDictionaries, - randomState, directory, resultsMonoview, classificationIndices): - return {"Monoview": [{"try": 0}, {"try2": 100}], "Multiview": [{"try3": 5}, {"try4": 10}]} + randomState, directory, resultsMonoview, + classificationIndices): + return {"Monoview": [{"try": 0}, {"try2": 100}], + "Multiview": [{"try3": 5}, {"try4": 10}]} class FakeArg(object): @@ -122,52 +151,69 @@ class FakeArg(object): self.pathF = "pathF" self.CL_HPS_iter = 1 + class FakeKfold(): def __init__(self): self.n_splits = 2 pass - def split(self, X,Y): - return [([X[0], X[1]], [X[2],X[3]]), (([X[2], X[3]], [X[0],X[1]]))] + def split(self, X, Y): + return [([X[0], X[1]], [X[2], X[3]]), (([X[2], X[3]], [X[0], X[1]]))] class Test_execOneBenchmark(unittest.TestCase): @classmethod def setUpClass(cls): - os.mkdir("multiview_platform/Tests/tmp_tests") def test_simple(cls): flag, resMono, resMulti = ExecClassif.execOneBenchmark(coreIndex=10, - LABELS_DICTIONARY={0: "a", 1: "b"}, + LABELS_DICTIONARY={ + 0: "a", + 1: "b"}, directory="multiview_platform/Tests/tmp_tests/", - classificationIndices=([1,2,3,4], [0,5,6,7,8]), + classificationIndices=( + [1, 2, 3, 4], + [0, 5, 6, 7, 8]), args=FakeArg(), kFolds=FakeKfold(), randomState="try", hyperParamSearch="try", metrics="try", - argumentDictionaries={"Monoview": [{"try": 0}, {"try2": 100}]}, + argumentDictionaries={ + "Monoview": [ + { + "try": 0}, + { + "try2": 100}]}, benchmark="try", views="try", viewsIndices="try", flag=None, - labels=np.array([0,1,2,1,2,2,2,12,1,2,1,1,2,1,21]), + labels=np.array( + [0, 1, 2, 1, + 2, 2, 2, 12, + 1, 2, 1, 1, + 2, 1, 21]), ExecMonoview_multicore=fakeExecMono, ExecMultiview_multicore=fakeExecMulti, initMultiviewArguments=fakeInitMulti) cls.assertEqual(flag, None) - cls.assertEqual(resMono, [["Mono", {"try": 0}], ["Mono", {"try2": 100}]]) - cls.assertEqual(resMulti, [["Multi", {"try3": 5}], ["Multi", {"try4": 10}]]) + cls.assertEqual(resMono, + [["Mono", {"try": 0}], ["Mono", {"try2": 100}]]) + cls.assertEqual(resMulti, + [["Multi", {"try3": 5}], ["Multi", {"try4": 10}]]) @classmethod def tearDownClass(cls): os.remove("multiview_platform/Tests/tmp_tests/train_indices.csv") os.remove("multiview_platform/Tests/tmp_tests/train_labels.csv") - os.remove("multiview_platform/Tests/tmp_tests/folds/test_labels_fold_0.csv") - os.remove("multiview_platform/Tests/tmp_tests/folds/test_labels_fold_1.csv") + os.remove( + "multiview_platform/Tests/tmp_tests/folds/test_labels_fold_0.csv") + os.remove( + "multiview_platform/Tests/tmp_tests/folds/test_labels_fold_1.csv") os.rmdir("multiview_platform/Tests/tmp_tests/folds") os.rmdir("multiview_platform/Tests/tmp_tests") @@ -179,41 +225,43 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): os.mkdir("multiview_platform/Tests/tmp_tests") def test_simple(cls): - flag, resMono, resMulti = ExecClassif.execOneBenchmark_multicore(nbCores=2, - LABELS_DICTIONARY={0: "a", 1: "b"}, - directory="multiview_platform/Tests/tmp_tests/", - classificationIndices=([1,2,3,4], [0,10,20,30,40]), - args=FakeArg(), - kFolds=FakeKfold(), - randomState="try", - hyperParamSearch="try", - metrics="try", - argumentDictionaries={"Monoview": [{"try": 0}, {"try2": 100}]}, - benchmark="try", - views="try", - viewsIndices="try", - flag=None, - labels=np.array([0,1,2,3,4,2,2,12,1,2,1,1,2,1,21]), - ExecMonoview_multicore=fakeExecMono, - ExecMultiview_multicore=fakeExecMulti, - initMultiviewArguments=fakeInitMulti) + flag, resMono, resMulti = ExecClassif.execOneBenchmark_multicore( + nbCores=2, + LABELS_DICTIONARY={0: "a", 1: "b"}, + directory="multiview_platform/Tests/tmp_tests/", + classificationIndices=([1, 2, 3, 4], [0, 10, 20, 30, 40]), + args=FakeArg(), + kFolds=FakeKfold(), + randomState="try", + hyperParamSearch="try", + metrics="try", + argumentDictionaries={"Monoview": [{"try": 0}, {"try2": 100}]}, + benchmark="try", + views="try", + viewsIndices="try", + flag=None, + labels=np.array([0, 1, 2, 3, 4, 2, 2, 12, 1, 2, 1, 1, 2, 1, 21]), + ExecMonoview_multicore=fakeExecMono, + ExecMultiview_multicore=fakeExecMulti, + initMultiviewArguments=fakeInitMulti) cls.assertEqual(flag, None) - cls.assertEqual(resMono, [["Mono", {"try": 0}], ["Mono", {"try2": 100}]]) - cls.assertEqual(resMulti, [["Multi", {"try3": 5}], ["Multi", {"try4": 10}]]) + cls.assertEqual(resMono, + [["Mono", {"try": 0}], ["Mono", {"try2": 100}]]) + cls.assertEqual(resMulti, + [["Multi", {"try3": 5}], ["Multi", {"try4": 10}]]) @classmethod def tearDownClass(cls): os.remove("multiview_platform/Tests/tmp_tests/train_indices.csv") os.remove("multiview_platform/Tests/tmp_tests/train_labels.csv") - os.remove("multiview_platform/Tests/tmp_tests/folds/test_labels_fold_0.csv") - os.remove("multiview_platform/Tests/tmp_tests/folds/test_labels_fold_1.csv") + os.remove( + "multiview_platform/Tests/tmp_tests/folds/test_labels_fold_0.csv") + os.remove( + "multiview_platform/Tests/tmp_tests/folds/test_labels_fold_1.csv") os.rmdir("multiview_platform/Tests/tmp_tests/folds") os.rmdir("multiview_platform/Tests/tmp_tests") - - - # # class Test_analyzeMulticlass(unittest.TestCase): # @@ -515,4 +563,4 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): # suite = unittest.TestSuite() # suite.addTest(Test_initBenchmark('test_initKWARGSFunc_no_monoview')) # # suite.addTest(WidgetTestCase('test_widget_resize')) -# return suite \ No newline at end of file +# return suite diff --git a/multiview_platform/Tests/test_ResultAnalysis.py b/multiview_platform/Tests/test_ResultAnalysis.py index 74a1e80016017108190e848b1cb631930c59fa1f..e74405d2c047515fdc76bd48803a8620f7a40b01 100644 --- a/multiview_platform/Tests/test_ResultAnalysis.py +++ b/multiview_platform/Tests/test_ResultAnalysis.py @@ -53,4 +53,4 @@ # cls.assertIn("chicken_is_heaven-View0", res) # cls.assertIn("Mumbo", res) # np.testing.assert_array_equal(res["Mumbo"], np.array([1,0,1,-100,-100,1,1,1,-100])) -# np.testing.assert_array_equal(res["chicken_is_heaven-View0"], np.array([0,1,1,-100,-100,1,1,1,-100])) \ No newline at end of file +# np.testing.assert_array_equal(res["chicken_is_heaven-View0"], np.array([0,1,1,-100,-100,1,1,1,-100])) diff --git a/multiview_platform/Versions.py b/multiview_platform/Versions.py index d3728e4c44cbb7b5ca5c0bb424bc2158a3bbb029..39d9af6f5a9714a075348a622e087bf1249de355 100644 --- a/multiview_platform/Versions.py +++ b/multiview_platform/Versions.py @@ -86,8 +86,11 @@ def testVersions(): toInstall.append("pickle") if not isUpToDate: - print("You can't run at the moment, please install the following modules : \n"+ "\n".join(toInstall)) + print( + "You can't run at the moment, please install the following modules : \n" + "\n".join( + toInstall)) quit() -if __name__== "__main__": - testVersions() \ No newline at end of file + +if __name__ == "__main__": + testVersions() diff --git a/multiview_platform/__init__.py b/multiview_platform/__init__.py index 954a8aca63720bc6615258c7ac495dea06f5852b..c41ea4a1af1c65845cc047a3626723ef53833812 100644 --- a/multiview_platform/__init__.py +++ b/multiview_platform/__init__.py @@ -2,4 +2,4 @@ __version__ = "0.0.0.0" -from . import MonoMultiViewClassifiers, Tests, Exec,Versions +from . import MonoMultiViewClassifiers, Tests, Exec, Versions diff --git a/setup.py b/setup.py index 388d42da0111c153d300f85ac13c1f44bff8190a..3091b2ff87777c6fd94a7671469d6f02f22f2a93 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -from setuptools import setup, find_packages from Cython.Build import cythonize +from setuptools import setup, find_packages import multiview_platform @@ -88,5 +88,6 @@ setup( # Il y a encore une chiée de paramètres possibles, mais avec ça vous # couvrez 90% des besoins - ext_modules=cythonize("multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx"), -) \ No newline at end of file + ext_modules=cythonize( + "multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx"), +)