diff --git a/docs/source/conf.py b/docs/source/conf.py index dd0b9811fd6d377cbde411c802a9668af031b04b..87552b054d5aa53eca0b24ca8c05dc589da1be22 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -27,6 +27,8 @@ # # needs_sphinx = '1.0' +add_module_names = False + # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. @@ -34,6 +36,8 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', + 'numpydoc', + 'nbsphinx', 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', diff --git a/docs/source/index.rst b/docs/source/index.rst index 015ec3d5cbd26ce9bcdec2e2fea6bfcfbef3a519..f51f1a3e6d9d6b39cfcd24ba1c5205978eef62fc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,7 +11,7 @@ Welcome to MultiviewPlatform's documentation! :caption: Contents: api - examples +.. examples diff --git a/docs/source/monomulti/exec_classif.rst b/docs/source/monomulti/exec_classif.rst new file mode 100644 index 0000000000000000000000000000000000000000..fb379570eb8367796a8ecc95cd12877dcfb03d0b --- /dev/null +++ b/docs/source/monomulti/exec_classif.rst @@ -0,0 +1,6 @@ +Classification execution module +=============================== + +.. automodule:: multiview_platform.MonoMultiViewClassifiers.ExecClassif + :members: + :inherited-members: \ No newline at end of file diff --git a/docs/source/monomulti/metrics.rst b/docs/source/monomulti/metrics.rst index 7fc2996d26ddfc21d15e23cafa59277f652bc1cb..c42b38c49b6529c78865f2ceacf212ae5b55f112 100644 --- a/docs/source/monomulti/metrics.rst +++ b/docs/source/monomulti/metrics.rst @@ -1,6 +1,6 @@ -Welcome to the metrics documentation! -============================================= +Metrics framework +================= -.. automodule:: multiview_platform.Metrics.framework +.. automodule:: multiview_platform.MonoMultiViewClassifiers.Metrics.framework :members: :inherited-members: \ No newline at end of file diff --git a/docs/source/monomulti/multiview_classifier.ipynb b/docs/source/monomulti/multiview_classifier.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fb3101d06b066ac5aefd5560d151f7e9932b56b6 --- /dev/null +++ b/docs/source/monomulti/multiview_classifier.ipynb @@ -0,0 +1,551 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to add a multiview classifier to the platform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## File addition " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* In the `Code/MonoMultiViewClassifiers/MultiviewClassifiers` package, add a new package named after your multiview classifier (let's call it NMC for New Multiview Classifier).\n", + "\n", + "* In this package (`Code/MonoMultiViewClassifiers/MultiviewClassifiers/NMC`), add a file called `NMCModule.py` and another one called `analyzeResults.py`. These will be the two files used by the platform to communicate with your implementation.\n", + "\n", + "* You can now add either a package named after your classifier `NMCPackage` and paste your files in it or just add a file with the same name if it is enough." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `NMCModule.py`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we will list all the necessary functions of the python module to allow the platform to use NMC" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `getArgs`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to multiple arguments dictionaries from one benchmark entry. It must return the `argumentsList` to which it must have add at least a dictionary containing all the necessary information to run NMC. You must add all general fields about the type of classifier and a field called `NMCKWARGS` (`<classifier_name>KWARGS`) conataining another dictionary with classifier-specific agruments (we assume here that NMC has two hyper-parameters : a set of weights and an integer) " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "arguments = {\"CL_type\":\"NMC\", \n", + " \"views\":[\"all\", \"the\", \"views\", \"names\"],\n", + " \"NB_VIEW\":len([\"all\", \"the\", \"views\", \"names\"]), \n", + " \"viewsIndices\":[\"the indices\", \"of the\", \"views in\", \"the hdf5 file\"], \n", + " \"NB_CLASS\": \"the number of labels of the dataset\", \n", + " \"LABLELS_NAMES\": [\"the names of\", \"the labels used\"], \n", + " \"NMCKWARGS\":{\"weights\":[], \n", + " \"integer\":42,\n", + " \"nbViews\":5}\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To fill these fields, you can use the default values given in argument of the function : " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):\n", + " argumentsList = []\n", + " nbViews = len(views)\n", + " arguments = {\"CL_type\": \"NMC\",\n", + " \"views\": views,\n", + " \"NB_VIEW\": len(views),\n", + " \"viewsIndices\": viewsIndices,\n", + " \"NB_CLASS\": len(args.CL_classes),\n", + " \"LABELS_NAMES\": args.CL_classes,\n", + " \"NMCKWARGS\": {\"weights\":[],\n", + " \"integer\":42,\n", + " \"nbViews\":5}}\n", + " argumentsList.append(arguments)\n", + " return argumentsList" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is also used to add the user-defined configuration for the classifier, but we will discuss it later" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `genName`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to generate a short string describing the classifier using its configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def genName(config):\n", + " return \"NMF\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some classifiers, like some late fusion classifiers will have more complicated `genName` functions that will need to summurize which monoview classifiers they use in a short string using the `config` argument that is exactly the dictionay called `\"NMCKWARGS\"` in the `getArgs` function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `getBenchmark`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to generate the `benchmark` argument of `getArgs`. It stores all the different configurations that will have to be tested (does not inculde hyper-parameters sets). For example for the Mumbo classifier, il will store the list of possible algorithms to use as weak leaners. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def getBenchmark(benchmark, args=None):\n", + " benchmark[\"Multiview\"][\"NMC\"] = [\"Some NMC cnfigurations\"]\n", + " return benchmark" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `benchmark` argument is pre-generated with an entry for all the multiview classifiers so you just need to fill it with the different configurations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `genParamsSets`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to generate random hyper-parameters sets to allow a randomized search to estimate the best one. It works in pair with the `setParams` method implemented in the classifier's class so you need to keep in mind the order of the hyper-paramters you used here.\n", + "\n", + "The `classificationKWARGS` argument is the `\"NMCKWARGS\"` entry seen earlier, and it is highly recommended to use the `randomState` object (which is described [here](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.random.RandomState.html)) to generate random numbers in order for the results to be reproductible\n", + "\n", + "Assuming our NMC classifier has 2 HP, one weight vector for each view and one integer that can be between 1 and 100, the `genParamsSets` function will look like :" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def genParamsSets(classificationKWARGS, randomState, nIter=1):\n", + " weightsVector = [randomState.random_sample(classificationKWARGS[\"nbViews\"]) for _ in range(nIter)]\n", + " nomralizedWeights = [weights/np.sum(weights) for weights in weightsVector]\n", + " intsVector = list(randomState.randint(1,100,nIter))\n", + " paramsSets = [[normalizedWeight, integer] for normalizedWeight, interger in zip(normalizedWeights, intsVector)]\n", + " return paramsSets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The `NMC` class" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It has to be named after the classifier adding `Class` at the end of its name. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "class NMCClass:\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `init` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is nothing specific to define in the `__init__` method, you just need to initialize the attributes of your classifier. The `kwargs` argument is the `NMCKWARGS` dictionary seen earlier. In our example, NMC uses two hyper parameters : weights and an int." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def __init__(self, randomState, NB_CORES=1, **kwargs):\n", + " if kwargs[\"weights\"] == []:\n", + " self.weights = randomState.random_sample(classificationKWARGS[\"nbViews\"])\n", + " else:\n", + " self.weights = kwargs[\"weights\"]\n", + " self.weights /= np.sum(self.weights)\n", + " self.integer = kwargs[\"integer\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `setParams` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is used to tune your classifier with a set of hyper parameters. The set is a list ordered as in the `genParamsSets` function seen earlier. The input of the `setParams` method is a list of parameters in the right order. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def setParams(self, paramsSet):\n", + " self.weights = paramsSet[0]\n", + " self.integer = paramsSet[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `fit_hdf5` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is generaly the same as `sklearn`'s `fit` method but uses as an input an HDF5 dataset in order to lower the memory usage of the whole platform.\n", + "* The `DATASET` object is an HDF5 dataset file containing all the views and labels. \n", + "* The `usedIndices` object is a `numpy` 1d-array containing the indices of the examples want to learn from. \n", + "* The `viewsIndices` object is a `numpy` 1d-array containing the indices of the views we want to learn from. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def fit_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):\n", + " # Call the fit function of your own module\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `predict_hdf5` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is used as an HDF5-compatible method similar to `sklearn`'s `predict` method. It has the same input than the `fit_hdf5` method but returns a 1d-array containing the labels of the asked examples (ordered as in `usedIndices`)." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):\n", + " # Call the predict function of your own module\n", + " predictedLabels = None # Just to avoid any ipynb running error\n", + " return predictedLabels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you've added everything to the `NMCModule.py` file you are close to be able to run your algorithm on the platform, you just need to fill the `analyzeResults.py` file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `analyzeResults.py`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `analyzeResults.py` file is a module used to get a specific result analysis for your classifier. You have, in order to run the platform, to add aunique function called `execute` that will run the analysis and return three different variables : \n", + "* `stringAnalysis` is a string that will be saved in a file to describe the classifier, its performance and may give some insights on the interpretation of it's way to classify. \n", + "* `imagesAnalysis` is a dictionary where you can store images (as values) to describe the classifier & co., the keys will be the images names. \n", + "* `metricsScores` is a dictionary where the values are lists containing train and test scores, and the keys are the metrics names. ( `metricsScores = {\"accuracy_score\":[0.99, 0.10]}` )\n", + "The `execute` function has as inputs : \n", + "* `classifier` is a classifier object from your classifiers class\n", + "* `trainLabels` are the labels predicted for the train set by the classifier\n", + "* `testLabels` are the labels predicted for the test set by the classifier\n", + "* `DATASET` is the HDF5 dataset object\n", + "* `classificationKWARGS` is the dictionary named `NMCKWARGS` earlier\n", + "* `classificationIndices` is a triplet containing the learning indices, the validation indices and the testIndices for multiclass classification\n", + "* `LABELS_DICTIONARY` is a dictionary containing a label as a key and it's name as a value\n", + "* `views` is the list of the views names used by the classifier\n", + "* `nbCores` is an `int` fixing the number of threads used by the platform \n", + "* `times` is a tuple containing the extraction time and the classification time\n", + "* `name` is the name ofthe database on which the plartform is running\n", + "* `KFolds` is an `sklearn` kfold object used for the cross-validation\n", + "* `hyperParamSearch` is the type of the hyper parameters optimization method\n", + "* `nIter` is the number of iterations of the hyper parameters method\n", + "* `metrics` is the list of the metrics and their arguments\n", + "* `viewsIndices` is 1d-array of the indices of the views used for classification\n", + "* `randomState` is a `numpy` RandomState object\n", + "* `labels` are the groud truth labels of the dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The basic function analyzing results for all the classifiers looks like : " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from ... import Metrics\n", + "from ...utils.MultiviewResultAnalysis import printMetricScore, getMetricsScores\n", + "\n", + "def execute(classifier, trainLabels,\n", + " testLabels, DATASET,\n", + " classificationKWARGS, classificationIndices,\n", + " LABELS_DICTIONARY, views, nbCores, times,\n", + " name, KFolds,\n", + " hyperParamSearch, nIter, metrics,\n", + " viewsIndices, randomState, labels):\n", + " CLASS_LABELS = labels\n", + " learningIndices, validationIndices, testIndicesMulticlass = classificationIndices\n", + "\n", + " metricModule = getattr(Metrics, metrics[0][0])\n", + " if metrics[0][1] is not None:\n", + " metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metrics[0][1]))\n", + " else:\n", + " metricKWARGS = {}\n", + " scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], CLASS_LABELS[learningIndices], **metricKWARGS)\n", + " scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], testLabels, **metricKWARGS)\n", + "\n", + " # To be modified to fit to your classifier \n", + " classifierConfigurationString = \"with weights : \"+ \", \".join(map(str, list(classifier.weights))) + \", and integer : \"+str(classifier.integer)\n", + " # Modify the name of the classifier in these strings\n", + " stringAnalysis = \"\\t\\tResult for Multiview classification with NMC \"+ \\\n", + " \"\\n\\n\" + metrics[0][0] + \" :\\n\\t-On Train : \" + str(scoreOnTrain) + \"\\n\\t-On Test : \" + str(\n", + " scoreOnTest) + \\\n", + " \"\\n\\nDataset info :\\n\\t-Database name : \" + name + \"\\n\\t-Labels : \" + \\\n", + " ', '.join(LABELS_DICTIONARY.values()) + \"\\n\\t-Views : \" + ', '.join(views) + \"\\n\\t-\" + str(\n", + " KFolds.n_splits) + \\\n", + " \" folds\\n\\nClassification configuration : \\n\\t-Algorithm used : NMC \" + classifierConfigurationString\n", + "\n", + " metricsScores = getMetricsScores(metrics, trainLabels, testLabels,\n", + " validationIndices, learningIndices, labels)\n", + " stringAnalysis += printMetricScore(metricsScores, metrics)\n", + "\n", + " imagesAnalysis = {}\n", + " return stringAnalysis, imagesAnalysis, metricsScores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you have done this, your classifier is ready to be used by the platform, but you can add some description about your classifier in the analyzeResults file. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adding arguments to avoid hyper parameter optimization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to be able to test a specific set of arguments on this platform, you need to add some lines in the argument parser located in the file `Code/MonoMultiViewClassifiers/utils/execution.py` in the `parseTheArgs` function. What you need to do is to add a group of arguments, allowing you to pass the hyper parameters in the command line :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "groupNMC = parser.add_argument_group('New Multiview Classifier arguments')\n", + "groupNMC.add_argument('--NMC_weights', metavar='FLOAT', action='store', nargs=\"+\",\n", + " help='Determine the weights of NMC', type=float,\n", + " default=[])\n", + "groupNMC.add_argument('--NMC_integer', metavar='INT', action='store',\n", + " help='Determine the integer of NMC', type=int,\n", + " default=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order for the platform to use these arguments, you need to modify the `getArgs` function of the file `NMCModule.py`. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):\n", + " argumentsList = []\n", + " nbViews = len(views)\n", + " arguments = {\"CL_type\": \"NMC\",\n", + " \"views\": views,\n", + " \"NB_VIEW\": len(views),\n", + " \"viewsIndices\": viewsIndices,\n", + " \"NB_CLASS\": len(args.CL_classes),\n", + " \"LABELS_NAMES\": args.CL_classes,\n", + " \"NMCKWARGS\": {\"weights\":args.NMC_weights, # Modified to take the args into account\n", + " \"integer\":args.NMC_integer, # Modified to take the args into account\n", + " \"nbViews\":5}}\n", + " argumentsList.append(arguments)\n", + " return argumentsList" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2.0 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/docs/source/monomulti/randomized_cv.ipynb b/docs/source/monomulti/randomized_cv.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..94594928b9e5b22abac6f2bd88f358cd93ed8253 --- /dev/null +++ b/docs/source/monomulti/randomized_cv.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Randomized example selection for classification\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train/test split generation " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The train/test splits are generated in the `execution.genSplits` function. It's task is to generate an train/test split for each statistical itetarion. In order to do that, it is fed by the following inputs \n", + "* `labels` are the data labels for all the dataset, \n", + "* `splitRatio` is a real number giving the ratio |test|/|all|,\n", + "* `statsIterRandomStates` is a list of `numpy` random states used to generate reproductible pseudo-random numbers\n", + "\n", + "The main operation in this function is done by the `sklearn.model_selection.StratifiedShuffleSplit` function which returns folds that are made by preserving the percentage of samples for each class.\n", + "In this case we askittosplit the dataset in two subsets with the asked test size. It then returns a shuffled train/test split while preserving the percentage of samples for each class.\n", + "We store the examples indices in two `np.array`s called `trainIndices` and `testIndices`\n", + "All the algortihms will then train (hyper-parameters cross-validation & learning) on the trainIndices. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def genSplits(labels, splitRatio, statsIterRandomStates):\n", + " \"\"\"Used to gen the train/test splits using one or multiple random states\n", + " classificationIndices is a list of train/test splits\"\"\"\n", + " indices = np.arange(len(labels))\n", + " splits = []\n", + " for randomState in statsIterRandomStates:\n", + " foldsObj = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1,\n", + " random_state=randomState,\n", + " test_size=splitRatio)\n", + " folds = foldsObj.split(indices, labels)\n", + " for fold in folds:\n", + " train_fold, test_fold = fold\n", + " trainIndices = indices[train_fold]\n", + " testIndices = indices[test_fold]\n", + " splits.append([trainIndices, testIndices])\n", + "\n", + " return splits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiclass problems" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To be able to use the platform on multiclass problems, a one-versus-one method is implemented. \n", + "In orderto use one-versus-one we need to modify the train/test splits generated by the previouslydescribed founction. \n", + "If the problem the platformis asked to resolve is multiclass then, it will generate all the possible two-class combinations to divide the main problem in multiple biclass ones. \n", + "In order to adapt each split, the `genMulticlassLabels` function will create new train/test splits by : \n", + "* Generating an `oldIndices` list containing all the examples indices that have their label in the combination\n", + "* Generate a new train split by selecting only the indices of `trainIndices` that have their labels in the combination.\n", + "* Do the samething for the test indices\n", + "* Copy the old `testIndices` variable in a new one called `testIndicesMulticlass` that will be used to predict on the entire dataset once the algorithm has learn to distinguish two classes\n", + "* Generate a new `label` array by replacing all the labels that are not in the combination by -100 to flag them as unseen labels during the training phase. \n", + "\n", + "Then the function will return a triplet : \n", + "* `multiclassLabels` is a list containing, for each combination, the newly generated labels with ones and zeros for each of the labels in the combination and -100 for the others.\n", + "* `labelsIndices` is a list contaningall the combinations,\n", + "* `indicesMulticlass` is a list containig triplets for each statistical iteration :\n", + " * `trainIndices` are the indices used for training that were picked only in the two classes of the combination (at the second step of the previous list),\n", + " * `testIndices` are the indices used for testing the biclass-generalization capacity of each biclass classifier learned on `trainIndices` that were picked only in the two classes of the combination (at the third step of the previous list),\n", + " * `tesIndicesMulticlass` are the indices described at the fourth setp of the previous list. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cross-validation folds " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "The cross validation folds are generated using a `StratifiedKFold` object, for the `sklearn.model_selection` library. \n", + "* For all the **monoview** algorithms, these objects (one for each statistical iteration) are then fed in a `sklearn.model_selection` `RandomisedSearchCV` object. So we don't have any custom stuff about cross-vaildation folds in the monoview case\n", + "* In the **multiview** case, they are used in the `utils.HyperParametersSearch` module, in the `randomizedSearch` function. In this case, they are used to split the learning set with `multiviewFolds = KFolds.split(learningIndices, labels[learningIndices])` and then used in `for trainIndices, testIndices in multiviewFolds:`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2.0 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/docs/source/monomulti/result_analysis.rst b/docs/source/monomulti/result_analysis.rst new file mode 100644 index 0000000000000000000000000000000000000000..723adb1ae25c471dc5bcb49fd79736000d6d5ff4 --- /dev/null +++ b/docs/source/monomulti/result_analysis.rst @@ -0,0 +1,6 @@ +Result alanysis module +====================== + +.. automodule:: multiview_platform.MonoMultiViewClassifiers.ResultAnalysis + :members: + :inherited-members: \ No newline at end of file diff --git a/docs/source/monomultidoc.rst b/docs/source/monomultidoc.rst index f044acf413dc9d90f8809b1bd1b7c917d287f15a..183b75b0b00e299a19ef011f141ed1cd3b57e859 100644 --- a/docs/source/monomultidoc.rst +++ b/docs/source/monomultidoc.rst @@ -5,9 +5,7 @@ Mono and mutliview classification :maxdepth: 1 :caption: Contents: - monomutli/metrics - monomutli/monoexec - monomutli/multiexec - monomutli/monoclf - monomutli/multiclf - monomutli/utils \ No newline at end of file + monomulti/metrics + monomulti/exec_classif + monomulti/result_analysis + monomulti/multiview_classifier \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py index e7e6ca9d4d40272a181d1cfcf6d9a0aa3a5ae48d..642acfd1baa3ffdea31a26bec42bef6b5aadf448 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py +++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py @@ -38,7 +38,7 @@ def initBenchmark(args): allMonoviewAlgos = [name for _, name, isPackage in pkgutil.iter_modules(['./MonoMultiViewClassifiers/MonoviewClassifiers']) - if (not isPackage)] + if (not isPackage) and name not in ["framework"]] benchmark["Monoview"] = allMonoviewAlgos benchmark["Multiview"] = dict((multiviewPackageName, "_") for multiviewPackageName in allMultiviewPackages) for multiviewPackageName in allMultiviewPackages: @@ -389,7 +389,7 @@ def execClassif(arguments): metrics = [metric.split(":") for metric in args.CL_metrics] if metrics == [[""]]: metricsNames = [name for _, name, isPackage - in pkgutil.iter_modules(['./MonoMultiViewClassifiers/Metrics']) if not isPackage and name not in ["log_loss", "matthews_corrcoef", "roc_auc_score"]] + in pkgutil.iter_modules(['./MonoMultiViewClassifiers/Metrics']) if not isPackage and name not in ["framework", "log_loss", "matthews_corrcoef", "roc_auc_score"]] metrics = [[metricName] for metricName in metricsNames] metrics = arangeMetrics(metrics, args.CL_metric_princ) for metricIndex, metric in enumerate(metrics): diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py index bf6146bc68e6da28eebb3bd71164d0624dfee022..e954a8f8d3cdc6598b4db7649a29d52c15e0b103 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/__init__.py @@ -24,11 +24,9 @@ Define a getConfig function """ import os -modules = [] for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): - if module in ['__init__.py', 'framework.py'] or module[-3:] != '.py': + if module in ['__init__.py'] or module[-3:] != '.py': continue __import__(module[:-3], locals(), globals(), [], 1) pass -del module del os \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/Metrics/framework.py b/multiview_platform/MonoMultiViewClassifiers/Metrics/framework.py new file mode 100644 index 0000000000000000000000000000000000000000..202ac1a01ac39c7089be65891ee35a33f275a63f --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/Metrics/framework.py @@ -0,0 +1,75 @@ +"""In ths file, we explain how to add a metric to the platform. + +In order to do that, on needs to add a file with the following functions +which are mandatory for the metric to work with the platform. +""" + +# Author-Info +__author__ = "Baptiste Bauvin" +__status__ = "Prototype" # Production, Development, Prototype + + +def score(y_true, y_pred, multiclass=False, **kwargs): + """Get the metric's score from the ground truth (``y_true``) and predictions (``y_pred``). + + Parameters + ---------- + y_true : array-like, shape = (n_samples,) + Target values (class labels). + + y_pred : array-like, shape = (n_samples,) + Predicted target values (class labels). + + multiclass : boolean (default=False) + Parameter specifying whether the target values are multiclass or not. + + kwargs : dict + The arguments stored in this dictionary must be keyed by string of + integers as "0", .., etc and decrypted in the function + + Returns + ------- + score : float + Returns the score of the prediction. + """ + score = 0.0 + return score + + +def get_scorer(**kwargs): + """Get the metric's scorer as in the sklearn.metrics package. + + Parameters + ---------- + kwargs : dict + The arguments stored in this dictionary must be keyed by string of + integers as "0", .., etc and decrypted in the function. These arguments + are a configuration of the metric. + + Returns + ------- + scorer : object + Callable object that returns a scalar score; greater is better. (cf sklearn.metrics.make_scorer) + """ + scorer = None + return scorer + + +def getConfig(**kwargs): + """Get the metric's configuration as a string. + + Parameters + ---------- + kwargs : dict + The arguments stored in this dictionary must be keyed by string of + integers as "0", .., etc and decrypted in the function. These arguments + are a configuration of the metric. + + Returns + ------- + configString : string + The string describing the metric's configuration. + """ + + configString = "This is a framework" + return configString