diff --git a/Code/MonoMultiViewClassifiers/utils/execution.py b/Code/MonoMultiViewClassifiers/utils/execution.py index e8d2d0e184f2340b1ba74a91eaaa5ba1bc9122a0..fbe1ad676d2c34ed581b1167b81fd6f9c8b88bed 100644 --- a/Code/MonoMultiViewClassifiers/utils/execution.py +++ b/Code/MonoMultiViewClassifiers/utils/execution.py @@ -194,7 +194,7 @@ def parseTheArgs(arguments): groupFatLateFusion = parser.add_argument_group('Fat Late Fusion arguments') groupFatLateFusion.add_argument('--FLF_weights', metavar='FLOAT', action='store', nargs="+", - help='Determine which late fusion method of fusion to use', type=float, + help='Determine the weights of each monoview decision for FLF', type=float, default=[]) args = parser.parse_args(arguments) diff --git a/ipynb/Adding a multiview classifier.ipynb b/ipynb/Adding a multiview classifier.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..14cabb5b5e48bf43597fbd2e4edb263a44eea7c9 --- /dev/null +++ b/ipynb/Adding a multiview classifier.ipynb @@ -0,0 +1,551 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to add a multiview classifier to the platform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## File addition " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* In the `Code/MonoMultiViewClassifiers/MultiviewClassifiers` package, add a new package named after your multiview classifier (let's call it NMC for New Multiview Classifier).\n", + "\n", + "* In this package (`Code/MonoMultiViewClassifiers/MultiviewClassifiers/NMC`), add a file called `NMCModule.py` and another one called `analyzeResults.py`. These will be the two files used by the platform to communicate with your implementation.\n", + "\n", + "* You can now add either a package named after your classifier `NMCPackage` and paste your files in it or just add a file with the same name if it is enough." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `NMCModule.py`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we will list all the necessary functions of the python module to allow the platform to use NMC" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `getArgs`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to multiple arguments dictionaries from one benchmark entry. It must return the `argumentsList` to which it must have add at least a dictionary containing all the necessary information to run NMC. You must add all general fields about the type of classifier and a field called `NMCKWARGS` (`<classifier_name>KWARGS`) conataining another dictionary with classifier-specific agruments (we assume here that NMC has two hyper-parameters : a set of weights and an integer) " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "arguments = {\"CL_type\":\"NMC\", \n", + " \"views\":[\"all\", \"the\", \"views\", \"names\"],\n", + " \"NB_VIEW\":len([\"all\", \"the\", \"views\", \"names\"]), \n", + " \"viewsIndices\":[\"the indices\", \"of the\", \"views in\", \"the hdf5 file\"], \n", + " \"NB_CLASS\": \"the number of labels of the dataset\", \n", + " \"LABLELS_NAMES\": [\"the names of\", \"the labels used\"], \n", + " \"NMCKWARGS\":{\"weights\":[], \n", + " \"integer\":42,\n", + " \"nbViews\":5}\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To fill these fields, you can use the default values given in argument of the function : " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):\n", + " argumentsList = []\n", + " nbViews = len(views)\n", + " arguments = {\"CL_type\": \"NMC\",\n", + " \"views\": views,\n", + " \"NB_VIEW\": len(views),\n", + " \"viewsIndices\": viewsIndices,\n", + " \"NB_CLASS\": len(args.CL_classes),\n", + " \"LABELS_NAMES\": args.CL_classes,\n", + " \"NMCKWARGS\": {\"weights\":[],\n", + " \"integer\":42,\n", + " \"nbViews\":5}}\n", + " argumentsList.append(arguments)\n", + " return argumentsList" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is also used to add the user-defined configuration for the classifier, but we will discuss it later" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `genName`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to generate a short string describing the classifier using its configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def genName(config):\n", + " return \"NMF\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some classifiers, like some late fusion classifiers will have more complicated `genName` functions that will need to summurize which monoview classifiers they use in a short string using the `config` argument that is exactly the dictionay called `\"NMCKWARGS\"` in the `getArgs` function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `getBenchmark`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to generate the `benchmark` argument of `getArgs`. It stores all the different configurations that will have to be tested (does not inculde hyper-parameters sets). For example for the Mumbo classifier, il will store the list of possible algorithms to use as weak leaners. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def getBenchmark(benchmark, args=None):\n", + " benchmark[\"Multiview\"][\"NMC\"] = [\"Some NMC cnfigurations\"]\n", + " return benchmark" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `benchmark` argument is pre-generated with an entry for all the multiview classifiers so you just need to fill it with the different configurations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `genParamsSets`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function is used to generate random hyper-parameters sets to allow a randomized search to estimate the best one. It works in pair with the `setParams` method implemented in the classifier's class so you need to keep in mind the order of the hyper-paramters you used here.\n", + "\n", + "The `classificationKWARGS` argument is the `\"NMCKWARGS\"` entry seen earlier, and it is highly recommended to use the `randomState` object (which is described [here](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.random.RandomState.html)) to generate random numbers in order for the results to be reproductible\n", + "\n", + "Assuming our NMC classifier has 2 HP, one weight vector for each view and one integer that can be between 1 and 100, the `genParamsSets` function will look like :" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def genParamsSets(classificationKWARGS, randomState, nIter=1):\n", + " weightsVector = [randomState.random_sample(classificationKWARGS[\"nbViews\"]) for _ in range(nIter)]\n", + " nomralizedWeights = [weights/np.sum(weights) for weights in weightsVector]\n", + " intsVector = list(randomState.randint(1,100,nIter))\n", + " paramsSets = [[normalizedWeight, integer] for normalizedWeight, interger in zip(normalizedWeights, intsVector)]\n", + " return paramsSets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The `NMC` class" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It has to be named after the classifier adding `Class` at the end of its name. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "class NMCClass:\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `init` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is nothing specific to define in the `__init__` method, you just need to initialize the attributes of your classifier. The `kwargs` argument is the `NMCKWARGS` dictionary seen earlier. In our example, NMC uses two hyper parameters : weights and an int." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def __init__(self, randomState, NB_CORES=1, **kwargs):\n", + " if kwargs[\"weights\"] == []:\n", + " self.weights = randomState.random_sample(classificationKWARGS[\"nbViews\"])\n", + " else:\n", + " self.weights = kwargs[\"weights\"]\n", + " self.weights /= np.sum(self.weights)\n", + " self.integer = kwargs[\"integer\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `setParams` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is used to tune your classifier with a set of hyper parameters. The set is a list ordered as in the `genParamsSets` function seen earlier. The input of the `setParams` method is a list of parameters in the right order. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def setParams(self, paramsSet):\n", + " self.weights = paramsSet[0]\n", + " self.integer = paramsSet[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `fit_hdf5` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is generaly the same as `sklearn`'s `fit` method but uses as an input an HDF5 dataset in order to lower the memory usage of the whole platform.\n", + "* The `DATASET` object is an HDF5 dataset file containing all the views and labels. \n", + "* The `usedIndices` object is a `numpy` 1d-array containing the indices of the examples want to learn from. \n", + "* The `viewsIndices` object is a `numpy` 1d-array containing the indices of the views we want to learn from. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def fit_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):\n", + " # Call the fit function of your own module\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `predict_hdf5` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method is used as an HDF5-compatible method similar to `sklearn`'s `predict` method. It has the same input than the `fit_hdf5` method but returns a 1d-array containing the labels of the asked examples (ordered as in `usedIndices`)." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):\n", + " # Call the predict function of your own module\n", + " predictedLabels = None # Just to avoid any ipynb running error\n", + " return predictedLabels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you've added everything to the `NMCModule.py` file you are close to be able to run your algorithm on the platform, you just need to fill the `analyzeResults.py` file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `analyzeResults.py`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `analyzeResults.py` file is a module used to get a specific result analysis for your classifier. You have, in order to run the platform, to add aunique function called `execute` that will run the analysis and return three different variables : \n", + "* `stringAnalysis` is a string that will be saved in a file to describe the classifier, its performance and may give some insights on the interpretation of it's way to classify. \n", + "* `imagesAnalysis` is a dictionary where you can store images (as values) to describe the classifier & co., the keys will be the images names. \n", + "* `metricsScores` is a dictionary where the values are lists containing train and test scores, and the keys are the metrics names. ( `metricsScores = {\"accuracy_score\":[0.99, 0.10]}` )\n", + "The `execute` function has as inputs : \n", + "* `classifier` is a classifier object from your classifiers class\n", + "* `trainLabels` are the labels predicted for the train set by the classifier\n", + "* `testLabels` are the labels predicted for the test set by the classifier\n", + "* `DATASET` is the HDF5 dataset object\n", + "* `classificationKWARGS` is the dictionary named `NMCKWARGS` earlier\n", + "* `classificationIndices` is a triplet containing the learning indices, the validation indices and the testIndices for multiclass classification\n", + "* `LABELS_DICTIONARY` is a dictionary containing a label as a key and it's name as a value\n", + "* `views` is the list of the views names used by the classifier\n", + "* `nbCores` is an `int` fixing the number of threads used by the platform \n", + "* `times` is a tuple containing the extraction time and the classification time\n", + "* `name` is the name ofthe database on which the plartform is running\n", + "* `KFolds` is an `sklearn` kfold object used for the cross-validation\n", + "* `hyperParamSearch` is the type of the hyper parameters optimization method\n", + "* `nIter` is the number of iterations of the hyper parameters method\n", + "* `metrics` is the list of the metrics and their arguments\n", + "* `viewsIndices` is 1d-array of the indices of the views used for classification\n", + "* `randomState` is a `numpy` RandomState object\n", + "* `labels` are the groud truth labels of the dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The basic function analyzing results for all the classifiers looks like : " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from ... import Metrics\n", + "from ...utils.MultiviewResultAnalysis import printMetricScore, getMetricsScores\n", + "\n", + "def execute(classifier, trainLabels,\n", + " testLabels, DATASET,\n", + " classificationKWARGS, classificationIndices,\n", + " LABELS_DICTIONARY, views, nbCores, times,\n", + " name, KFolds,\n", + " hyperParamSearch, nIter, metrics,\n", + " viewsIndices, randomState, labels):\n", + " CLASS_LABELS = labels\n", + " learningIndices, validationIndices, testIndicesMulticlass = classificationIndices\n", + "\n", + " metricModule = getattr(Metrics, metrics[0][0])\n", + " if metrics[0][1] is not None:\n", + " metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metrics[0][1]))\n", + " else:\n", + " metricKWARGS = {}\n", + " scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], CLASS_LABELS[learningIndices], **metricKWARGS)\n", + " scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], testLabels, **metricKWARGS)\n", + "\n", + " # To be modified to fit to your classifier \n", + " classifierConfigurationString = \"with weights : \"+ \", \".join(map(str, list(classifier.weights))) + \", and integer : \"+str(classifier.integer)\n", + " # Modify the name of the classifier in these strings\n", + " stringAnalysis = \"\\t\\tResult for Multiview classification with NMC \"+ \\\n", + " \"\\n\\n\" + metrics[0][0] + \" :\\n\\t-On Train : \" + str(scoreOnTrain) + \"\\n\\t-On Test : \" + str(\n", + " scoreOnTest) + \\\n", + " \"\\n\\nDataset info :\\n\\t-Database name : \" + name + \"\\n\\t-Labels : \" + \\\n", + " ', '.join(LABELS_DICTIONARY.values()) + \"\\n\\t-Views : \" + ', '.join(views) + \"\\n\\t-\" + str(\n", + " KFolds.n_splits) + \\\n", + " \" folds\\n\\nClassification configuration : \\n\\t-Algorithm used : NMC \" + classifierConfigurationString\n", + "\n", + " metricsScores = getMetricsScores(metrics, trainLabels, testLabels,\n", + " validationIndices, learningIndices, labels)\n", + " stringAnalysis += printMetricScore(metricsScores, metrics)\n", + "\n", + " imagesAnalysis = {}\n", + " return stringAnalysis, imagesAnalysis, metricsScores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you have done this, your classifier is ready to be used by the platform, but you can add some description about your classifier in the analyzeResults file. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adding arguments to avoid hyper parameter optimization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to be able to test a specific set of arguments on this platform, you need to add some lines in the argument parser located in the file `Code/MonoMultiViewClassifiers/utils/execution.py` in the `parseTheArgs` function. What you need to do is to add a group of arguments, allowing you to pass the hyper parameters in the command line :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "groupNMC = parser.add_argument_group('New Multiview Classifier arguments')\n", + "groupNMC.add_argument('--NMC_weights', metavar='FLOAT', action='store', nargs=\"+\",\n", + " help='Determine the weights of NMC', type=float,\n", + " default=[])\n", + "groupNMC.add_argument('--NMC_integer', metavar='INT', action='store',\n", + " help='Determine the integer of NMC', type=int,\n", + " default=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order for the platform to use these arguments, you need to modify the `getArgs` function of the file `NMCModule.py`. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):\n", + " argumentsList = []\n", + " nbViews = len(views)\n", + " arguments = {\"CL_type\": \"NMC\",\n", + " \"views\": views,\n", + " \"NB_VIEW\": len(views),\n", + " \"viewsIndices\": viewsIndices,\n", + " \"NB_CLASS\": len(args.CL_classes),\n", + " \"LABELS_NAMES\": args.CL_classes,\n", + " \"NMCKWARGS\": {\"weights\":args.NMC_weights, # Modified to take the args into account\n", + " \"integer\":args.NMC_integer, # Modified to take the args into account\n", + " \"nbViews\":5}}\n", + " argumentsList.append(arguments)\n", + " return argumentsList" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}