diff --git a/docs/source/monomulti/result_analysis.rst b/docs/source/analyzeresult.rst similarity index 63% rename from docs/source/monomulti/result_analysis.rst rename to docs/source/analyzeresult.rst index 723adb1ae25c471dc5bcb49fd79736000d6d5ff4..2367d0d6d17114b02e7ae8770033eb9810088785 100644 --- a/docs/source/monomulti/result_analysis.rst +++ b/docs/source/analyzeresult.rst @@ -1,6 +1,5 @@ -Result alanysis module +Result analysis module ====================== .. automodule:: multiview_platform.MonoMultiViewClassifiers.ResultAnalysis - :members: - :inherited-members: \ No newline at end of file + :members: \ No newline at end of file diff --git a/docs/source/api.rst b/docs/source/api.rst index 0cc50b07d92a34044eb6790610dec05654126783..d5bc51ec2f59e5cf9a482a0c29bfa8197f2b7703 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -6,4 +6,5 @@ Multiview Platform :caption: Contents: execution - monomultidoc \ No newline at end of file + monomultidoc + analyzeresult \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 89899abc3f6d688705f91746ff3fd0baf5c9ae58..2a8198dce645d364064e6186c41c19e02af88e3c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -38,13 +38,13 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', - 'numpydoc', 'nbsphinx', 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages'] + 'sphinx.ext.githubpages', + 'sphinx.ext.napoleon'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -111,7 +111,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = [] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/docs/source/execution.rst b/docs/source/execution.rst index 91c16ad356895de12f3131cb4b747c21d655806c..3d26fece2aa89ea3212a2051624d9068f8e8b8fb 100644 --- a/docs/source/execution.rst +++ b/docs/source/execution.rst @@ -1,7 +1,6 @@ -Welcome tothe exection documentation! +Welcome to the exection documentation ===================================== .. automodule:: multiview_platform.Exec :members: - :inherited-members: diff --git a/docs/source/monomulti/monoview_classifier.ipynb b/docs/source/monomulti/monoview_classifier.ipynb index c388766ff2670b5883b6c7cc028990ace5640ead..a7e85bbc180ab7c192038b3667fbd619ce872881 100644 --- a/docs/source/monomulti/monoview_classifier.ipynb +++ b/docs/source/monomulti/monoview_classifier.ipynb @@ -80,7 +80,7 @@ "kernelspec": { "display_name": "Python 2", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -91,7 +91,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", + "pygments_lexer": "ipython3", "version": "2.7.13" } }, diff --git a/docs/source/monomulti/multiview_classifier.ipynb b/docs/source/monomulti/multiview_classifier.ipynb index fb3101d06b066ac5aefd5560d151f7e9932b56b6..734b0c79b56507b04073f1d682c1853037f4c186 100644 --- a/docs/source/monomulti/multiview_classifier.ipynb +++ b/docs/source/monomulti/multiview_classifier.ipynb @@ -530,8 +530,8 @@ "metadata": { "kernelspec": { "display_name": "Python 2", - "language": "python", - "name": "python2" + "language": "python3", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -542,10 +542,10 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", + "pygments_lexer": "ipython3", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst b/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst index cab3a2bc569bd3190d643b2dfed3b0675793cbcc..507f7d5917907d61a79b647ccefb6cd088b47f00 100644 --- a/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst +++ b/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst @@ -1,6 +1,5 @@ -Metrics framework -================= +Diversity Fusion Classifiers +============================ -.. automodule:: multiview_platform.MonoMultiViewClassifiers.MultiviewClassifiers.diversity_utils +.. automodule:: multiview_platform.MonoMultiViewClassifiers.Multiview.Additions.diversity_utils :members: - :inherited-members: \ No newline at end of file diff --git a/docs/source/monomulti/randomized_cv.ipynb b/docs/source/monomulti/randomized_cv.ipynb deleted file mode 100644 index 94594928b9e5b22abac6f2bd88f358cd93ed8253..0000000000000000000000000000000000000000 --- a/docs/source/monomulti/randomized_cv.ipynb +++ /dev/null @@ -1,140 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Randomized example selection for classification\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train/test split generation " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The train/test splits are generated in the `execution.genSplits` function. It's task is to generate an train/test split for each statistical itetarion. In order to do that, it is fed by the following inputs \n", - "* `labels` are the data labels for all the dataset, \n", - "* `splitRatio` is a real number giving the ratio |test|/|all|,\n", - "* `statsIterRandomStates` is a list of `numpy` random states used to generate reproductible pseudo-random numbers\n", - "\n", - "The main operation in this function is done by the `sklearn.model_selection.StratifiedShuffleSplit` function which returns folds that are made by preserving the percentage of samples for each class.\n", - "In this case we askittosplit the dataset in two subsets with the asked test size. It then returns a shuffled train/test split while preserving the percentage of samples for each class.\n", - "We store the examples indices in two `np.array`s called `trainIndices` and `testIndices`\n", - "All the algortihms will then train (hyper-parameters cross-validation & learning) on the trainIndices. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def genSplits(labels, splitRatio, statsIterRandomStates):\n", - " \"\"\"Used to gen the train/test splits using one or multiple random states\n", - " classificationIndices is a list of train/test splits\"\"\"\n", - " indices = np.arange(len(labels))\n", - " splits = []\n", - " for randomState in statsIterRandomStates:\n", - " foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,\n", - " random_state=randomState,\n", - " test_size=splitRatio)\n", - " folds = foldsObj.split(indices, labels)\n", - " for fold in folds:\n", - " train_fold, test_fold = fold\n", - " trainIndices = indices[train_fold]\n", - " testIndices = indices[test_fold]\n", - " splits.append([trainIndices, testIndices])\n", - "\n", - " return splits" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Multiclass problems" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To be able to use the platform on multiclass problems, a one-versus-one method is implemented. \n", - "In orderto use one-versus-one we need to modify the train/test splits generated by the previouslydescribed founction. \n", - "If the problem the platformis asked to resolve is multiclass then, it will generate all the possible two-class combinations to divide the main problem in multiple biclass ones. \n", - "In order to adapt each split, the `genMulticlassLabels` function will create new train/test splits by : \n", - "* Generating an `oldIndices` list containing all the examples indices that have their label in the combination\n", - "* Generate a new train split by selecting only the indices of `trainIndices` that have their labels in the combination.\n", - "* Do the samething for the test indices\n", - "* Copy the old `testIndices` variable in a new one called `testIndicesMulticlass` that will be used to predict on the entire dataset once the algorithm has learn to distinguish two classes\n", - "* Generate a new `label` array by replacing all the labels that are not in the combination by -100 to flag them as unseen labels during the training phase. \n", - "\n", - "Then the function will return a triplet : \n", - "* `multiclassLabels` is a list containing, for each combination, the newly generated labels with ones and zeros for each of the labels in the combination and -100 for the others.\n", - "* `labelsIndices` is a list contaningall the combinations,\n", - "* `indicesMulticlass` is a list containig triplets for each statistical iteration :\n", - " * `trainIndices` are the indices used for training that were picked only in the two classes of the combination (at the second step of the previous list),\n", - " * `testIndices` are the indices used for testing the biclass-generalization capacity of each biclass classifier learned on `trainIndices` that were picked only in the two classes of the combination (at the third step of the previous list),\n", - " * `tesIndicesMulticlass` are the indices described at the fourth setp of the previous list. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cross-validation folds " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "The cross validation folds are generated using a `StratifiedKFold` object, for the `sklearn.model_selection` library. \n", - "* For all the **monoview** algorithms, these objects (one for each statistical iteration) are then fed in a `sklearn.model_selection` `RandomisedSearchCV` object. So we don't have any custom stuff about cross-vaildation folds in the monoview case\n", - "* In the **multiview** case, they are used in the `utils.HyperParametersSearch` module, in the `randomizedSearch` function. In this case, they are used to split the learning set with `multiviewFolds = KFolds.split(learningIndices, labels[learningIndices])` and then used in `for trainIndices, testIndices in multiviewFolds:`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2.0 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.13" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/docs/source/monomultidoc.rst b/docs/source/monomultidoc.rst index 822774f04fb64c149824472fd17f7fa7af2404c2..b25fd849aaefb289724abedd80a1a95ee03d3938 100644 --- a/docs/source/monomultidoc.rst +++ b/docs/source/monomultidoc.rst @@ -9,7 +9,6 @@ Mono and mutliview classification monomulti/monoview_classifier monomulti/multiview_classifier monomulti/exec_classif - monomulti/result_analysis monomulti/multiview_classifiers/diversity_fusion monomulti/utils/execution monomulti/utils/multiclass diff --git a/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py b/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6faeb7e91dba45d3ff7f7c7fac4bb1b2d0de1a00 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/Multiview/Additions/__init__.py @@ -0,0 +1 @@ +from . import diversity_utils \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py index 192a4faff0b467b59642e1949efc580385edddd9..294d5ca72a820148df1bb4adad73d6e3012ebba3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py +++ b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py @@ -1,18 +1,12 @@ # Import built-in modules import time import os -import pylab import errno import logging # Import third party modules -import matplotlib - -# matplotlib.use('Agg') import matplotlib.pyplot as plt -import matplotlib.patches as mpatches import numpy as np -# from matplotlib import cm import matplotlib as mpl # Import own Modules @@ -72,11 +66,11 @@ def getMetricsScoresBiclass(metrics, results): Returns ------- metricsScores : dict of dict of list - Regroups all the scores for each metrics for each classifier and for the train and test sets. - organized as : - -`metricScores[metric_name]["classifiersNames"]` is a list of all the classifiers available for this metric, - -`metricScores[metric_name]["trainScores"]` is a list of all the available classifiers scores on the train set, - -`metricScores[metric_name]["testScores"]` is a list of all the available classifiers scores on the test set. + Regroups all the scores for each metrics for each classifier and for the train and test sets. + organized as : + -`metricScores[metric_name]["classifiersNames"]` is a list of all the classifiers available for this metric, + -`metricScores[metric_name]["trainScores"]` is a list of all the available classifiers scores on the train set, + -`metricScores[metric_name]["testScores"]` is a list of all the available classifiers scores on the test set. """ metricsScores = {} @@ -281,6 +275,20 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): def iterCmap(statsIter): + r"""Used to generate a colormap that will have a tick for each iteration : the whiter the better. + + Parameters + ---------- + statsIter : int + The number of statistical iterations. + + Returns + ------- + cmap : matplotlib.colors.ListedColorMap object + The colormap. + norm : matplotlib.colors.BoundaryNorm object + The bounds for the colormap. + """ cmapList = ["red", "0.0"]+[str(float((i+1))/statsIter) for i in range(statsIter)] cmap = mpl.colors.ListedColormap(cmapList) bounds = [-100*statsIter-0.5, -0.5] @@ -293,6 +301,35 @@ def iterCmap(statsIter): def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, fileName, minSize=10, width_denominator=2.0, height_denominator=20.0, statsIter=1): + r"""Used to generate a 2D plot of the errors. + + Parameters + ---------- + data : np.array of shape `(nbClassifiers, nbExamples)` + A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well + and -100 if the example was not classified. + classifiersNames : list of str + The names of the classifiers. + nbClassifiers : int + The number of classifiers. + nbExamples : int + The number of examples. + nbCopies : int + The number of times the data is copied (classifier wise) in order for the figure to be more readable + fileName : str + The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end) + minSize : int, optinal, default: 10 + The minimum width and height of the figure. + width_denominator : float, optional, default: 1.0 + To obtain the image width, the number of classifiers will be divided by this number. + height_denominator : float, optional, default: 1.0 + To obtain the image width, the number of examples will be divided by this number. + statsIter : int, optional, default: 1 + The number of statistical iterations realized. + + Returns + ------- + """ figWidth = max(nbClassifiers / width_denominator, minSize) figHeight = max(nbExamples / height_denominator, minSize) figKW = {"figsize": (figWidth, figHeight)} @@ -311,6 +348,24 @@ def publish2Dplot(data, classifiersNames, nbClassifiers, nbExamples, nbCopies, f def publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, fileName): + r"""Used to generate a barplot of the muber of classifiers that failed to classify each examples + + Parameters + ---------- + errorOnExamples : np.array of shape `(nbExamples,)` + An array counting how many classifiers failed to classifiy each examples. + classifiersNames : list of str + The names of the classifiers. + nbClassifiers : int + The number of classifiers. + nbExamples : int + The number of examples. + fileName : str + The name of the file in which the figure will be saved ("error_analysis_2D.png" will be added at the end) + + Returns + ------- + """ fig, ax = plt.subplots() x = np.arange(nbExamples) plt.bar(x, errorOnExamples) @@ -321,6 +376,39 @@ def publishErrorsBarPlot(errorOnExamples, nbClassifiers, nbExamples, fileName): def gen_error_data(example_errors, base_file_name, nbCopies=2): + r"""Used to format the error data in order to plot it efficiently. The data is saves in a `.csv` file. + + Parameters + ---------- + example_errors : dict of dicts of np.arrays + A dictionary conatining all the useful data. Organized as : + `example_errors[<classifier_name>]["errorOnExamples"]` is a np.array of ints with a + - 1 if the classifier `<classifier_name>` classifier well the example, + - 0 if it fail to classify the example, + - -100 if it did not classify the example (multiclass one versus one). + base_file_name : list of str + The name of the file in which the figure will be saved ("2D_plot_data.csv" and "bar_plot_data.csv" will + be added at the end) + nbCopies : int, optinal, default: 2 + The number of times the data is copied (classifier wise) in order for the figure to be more readable. + + + Returns + ------- + nbClassifiers : int + Number of different classifiers + nbExamples : int + NUmber of examples + nbCopies : int + The number of times the data is copied (classifier wise) in order for the figure to be more readable. + classifiersNames : list of strs + The names fo the classifiers. + data : np.array of shape `(nbClassifiers, nbExamples)` + A matrix with zeros where the classifier failed to classifiy the example, ones where it classified it well + and -100 if the example was not classified. + errorOnExamples : np.array of shape `(nbExamples,)` + An array counting how many classifiers failed to classifiy each examples. + """ nbClassifiers = len(example_errors) nbExamples = len(list(example_errors.values())[0]["errorOnExamples"]) classifiersNames = example_errors.keys()