Merge branch 'master' into 'farah_notation_and_related_work'

# Conflicts: # .gitignore

Merge branch 'master' into 'farah_notation_and_related_work'
44557ada · Charly Lamothe · 7d764f9c · feaaeb22 · 44557ada · 44557ada
Commit 44557ada authored Nov 29, 2019 by Charly Lamothe
--- a/.gitignore
+++ b/.gitignore
+models/*
+results/*
+
 */.kile/*
 *.kilepr
 # Byte-compiled / optimized / DLL files
@@ -368,3 +371,6 @@ TSWLatexianTemp*
 *.lpz

 reports/*.pdf
+
+# Image
+*.png
--- a/README.md
+++ b/README.md
@@ -49,5 +49,16 @@ Project Organization
 Instal project
 --------------

+First install the project pacakge:
+
 	pip install -r requirements.txt

+Then create a file `.env` by copying the file `.env.example`:
+	
+	cp .env.example .env
+	
+Then you must set the project directory in the `.env` file :
+ 
+	project_dir = "path/to/your/project/directory"	
+
+This directory will be used for storing the model parameters.
\ No newline at end of file
--- a/TODO.md
+++ b/TODO.md
-* Trouver des jeux de données pertinents
-* Entraîner et tester des forêts de différentes tailles
-* Entraîner et tester en regression et classification
-* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images)
-* Entraîner avec différents hyperparamètres (d, profondeur, epsilon)
-* Appliquer OMP avec différentes valeurs de k (notamment un petit k)
-* Faire des figures
-* Implémenter et comparer les systèmes concurrents
\ No newline at end of file
+* Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
+* Fix ModelFactory.load function.
+* Fix model results loading in compute_results.py.
+* Check that omp multiclasses classifier is working as expected.
+* In the bayesian search computation, output a different file name depending on the task of the trained model.
+* Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
+* Prepare the json experiment files to run.
\ No newline at end of file
--- a/bolsonaro/data/__init__.py
+++ b/bolsonaro/data/__init__.py
--- a/bolsonaro/data/make_dataset.py
+++ b/bolsonaro/data/make_dataset.py
--- a/bolsonaro/example.py
+++ b/bolsonaro/example.py
-from bolsonaro.utils import root_directory
-
-print(str(root_directory))
\ No newline at end of file
--- a/bolsonaro/models/__init__.py
+++ b/bolsonaro/models/__init__.py
--- a/bolsonaro/models/create_model.py
+++ b/bolsonaro/models/create_model.py
--- a/bolsonaro/utils.py
+++ b/bolsonaro/utils.py
-from pathlib import Path
-
-root_directory = Path(__file__).parent.parent.absolute()
--- a/bolsonaro/visualization/__init__.py
+++ b/bolsonaro/visualization/__init__.py
--- a/bolsonaro/visualization/visualize.py
+++ b/bolsonaro/visualization/visualize.py
--- a/code/bolsonaro/__init__.py
+++ b/code/bolsonaro/__init__.py
+import os
+
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
--- a/bolsonaro/data/.gitkeep
+++ b/bolsonaro/data/.gitkeep
--- a/code/bolsonaro/data/__init__.py
+++ b/code/bolsonaro/data/__init__.py
+import os
+
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
--- a/code/bolsonaro/data/dataset.py
+++ b/code/bolsonaro/data/dataset.py
+class Dataset(object):
+
+    def __init__(self, task, X_train, X_dev, X_test, y_train,
+        y_dev, y_test):
+        self._task = task
+        self._X_train = X_train
+        self._X_dev = X_dev
+        self._X_test = X_test
+        self._y_train = y_train
+        self._y_dev = y_dev
+        self._y_test = y_test
+
+    @property
+    def task(self):
+        return self._task
+
+    @property
+    def dataset_parameters(self):
+        return self._dataset_parameters
+
+    @property
+    def X_train(self):
+        return self._X_train
+    
+    @property
+    def X_dev(self):
+        return self._X_dev
+
+    @property
+    def X_test(self):
+        return self._X_test
+
+    @property
+    def y_train(self):
+        return self._y_train
+
+    @property
+    def y_dev(self):
+        return self._y_dev
+
+    @property
+    def y_test(self):
+        return self._y_test
--- a/code/bolsonaro/data/dataset_loader.py
+++ b/code/bolsonaro/data/dataset_loader.py
+from bolsonaro.data.dataset import Dataset
+from bolsonaro.data.task import Task
+from bolsonaro.utils import change_binary_func_load
+
+from sklearn.datasets import load_boston, load_iris, load_diabetes, \
+    load_digits, load_linnerud, load_wine, load_breast_cancer
+from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
+    fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \
+    fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
+from sklearn.model_selection import train_test_split
+from sklearn import preprocessing
+
+
+class DatasetLoader(object):
+
+    @staticmethod
+    def load(dataset_parameters):
+        name = dataset_parameters.name
+        if name == 'boston':
+            dataset_loading_func = load_boston
+            task = Task.REGRESSION
+        elif name == 'iris':
+            dataset_loading_func = load_iris
+            task = Task.MULTICLASSIFICATION
+        elif name == 'diabetes':
+            dataset_loading_func = load_diabetes
+            task = Task.REGRESSION
+        elif name == 'digits':
+            dataset_loading_func = load_digits
+            task = Task.MULTICLASSIFICATION
+        elif name == 'linnerud':
+            dataset_loading_func = load_linnerud
+            task = Task.REGRESSION
+        elif name == 'wine':
+            dataset_loading_func = load_wine
+            task = Task.MULTICLASSIFICATION
+        elif name == 'breast_cancer':
+            dataset_loading_func = change_binary_func_load(load_breast_cancer)
+            task = Task.BINARYCLASSIFICATION
+        elif name == 'olivetti_faces':  # bug (no return X_y)
+            dataset_loading_func = fetch_olivetti_faces
+            task = Task.MULTICLASSIFICATION
+        elif name == '20newsgroups':  # bug (no return X_y)
+            dataset_loading_func = fetch_20newsgroups
+            task = Task.MULTICLASSIFICATION
+        elif name == '20newsgroups_vectorized':
+            dataset_loading_func = fetch_20newsgroups_vectorized
+            task = Task.MULTICLASSIFICATION
+        elif name == 'lfw_people':  # needs PIL (image dataset)
+            dataset_loading_func = fetch_lfw_people
+            task = Task.MULTICLASSIFICATION
+        elif name == 'lfw_pairs':
+            dataset_loading_func = fetch_lfw_pairs
+            task = Task.MULTICLASSIFICATION
+        elif name == 'covtype':
+            dataset_loading_func = fetch_covtype
+            task = Task.MULTICLASSIFICATION
+        elif name == 'rcv1':
+            dataset_loading_func = fetch_rcv1
+            task = Task.MULTICLASSIFICATION
+        elif name == 'kddcup99':
+            dataset_loading_func = fetch_kddcup99
+            task = Task.MULTICLASSIFICATION
+        elif name == 'california_housing':
+            dataset_loading_func = fetch_california_housing
+            task = Task.REGRESSION
+        else:
+            raise ValueError("Unsupported dataset '{}'".format(name))
+
+        X, y = dataset_loading_func(return_X_y=True)
+        X_train, X_test, y_train, y_test = train_test_split(X, y,
+            test_size=dataset_parameters.test_size,
+            random_state=dataset_parameters.random_state)
+        X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train,
+            test_size=dataset_parameters.dev_size,
+            random_state=dataset_parameters.random_state)
+
+        if dataset_parameters.dataset_normalizer is not None:
+            if dataset_parameters.dataset_normalizer == 'standard':
+                scaler = preprocessing.StandardScaler()
+            elif dataset_parameters.dataset_normalizer == 'minmax':
+                scaler = preprocessing.MinMaxScaler()
+            elif dataset_parameters.dataset_normalizer == 'robust':
+                scaler = preprocessing.RobustScaler()
+            elif dataset_parameters.dataset_normalizer == 'normalizer':
+                scaler = preprocessing.Normalizer()
+            else:
+                raise ValueError("Unsupported normalizer '{}'".format(dataset_parameters.dataset_normalizer))
+            X_train = scaler.fit_transform(X_train)
+            X_dev = scaler.transform(X_dev)
+            X_test = scaler.transform(X_test)
+
+        return Dataset(task, X_train,
+            X_dev, X_test, y_train, y_dev, y_test)
--- a/code/bolsonaro/data/dataset_parameters.py
+++ b/code/bolsonaro/data/dataset_parameters.py
+from bolsonaro.utils import save_obj_to_json, load_obj_from_json
+
+import os
+
+
+class DatasetParameters(object):
+
+    def __init__(self, name, test_size, dev_size, random_state, dataset_normalizer):
+        self._name = name
+        self._test_size = test_size
+        self._dev_size = dev_size
+        self._random_state = random_state
+        self._dataset_normalizer = dataset_normalizer
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def test_size(self):
+        return self._test_size
+
+    @property
+    def dev_size(self):
+        return self._dev_size
+
+    @property
+    def random_state(self):
+        return self._random_state
+
+    @property
+    def dataset_normalizer(self):
+        return self._dataset_normalizer
+
+    def save(self, directory_path, experiment_id):
+        save_obj_to_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
+            self.__dict__)
+
+    @staticmethod
+    def load(directory_path, experiment_id):
+        return load_obj_from_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
+            DatasetParameters)
--- a/code/bolsonaro/data/task.py
+++ b/code/bolsonaro/data/task.py
+from enum import Enum
+
+
+class Task(Enum):
+    BINARYCLASSIFICATION = 1
+    REGRESSION = 2
+    MULTICLASSIFICATION = 3
--- a/code/bolsonaro/error_handling/__init__.py
+++ b/code/bolsonaro/error_handling/__init__.py
+ #####################################################################################
+ # MIT License                                                                       #
+ #                                                                                   #
+ # Copyright (C) 2019 Charly Lamothe                                                 #
+ #                                                                                   #
+ # This file is part of VQ-VAE-Speech.                                               #
+ #                                                                                   #
+ #   Permission is hereby granted, free of charge, to any person obtaining a copy    #
+ #   of this software and associated documentation files (the "Software"), to deal   #
+ #   in the Software without restriction, including without limitation the rights    #
+ #   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell       #
+ #   copies of the Software, and to permit persons to whom the Software is           #
+ #   furnished to do so, subject to the following conditions:                        #
+ #                                                                                   #
+ #   The above copyright notice and this permission notice shall be included in all  #
+ #   copies or substantial portions of the Software.                                 #
+ #                                                                                   #
+ #   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR      #
+ #   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,        #
+ #   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE     #
+ #   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER          #
+ #   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,   #
+ #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE   #
+ #   SOFTWARE.                                                                       #
+ #####################################################################################
+
+import os
+
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
--- a/code/bolsonaro/error_handling/logger_factory.py
+++ b/code/bolsonaro/error_handling/logger_factory.py
+ #####################################################################################
+ # MIT License                                                                       #
+ #                                                                                   #
+ # Copyright (C) 2019 Charly Lamothe                                                 #
+ #                                                                                   #
+ # This file is part of VQ-VAE-Speech.                                               #
+ #                                                                                   #
+ #   Permission is hereby granted, free of charge, to any person obtaining a copy    #
+ #   of this software and associated documentation files (the "Software"), to deal   #
+ #   in the Software without restriction, including without limitation the rights    #
+ #   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell       #
+ #   copies of the Software, and to permit persons to whom the Software is           #
+ #   furnished to do so, subject to the following conditions:                        #
+ #                                                                                   #
+ #   The above copyright notice and this permission notice shall be included in all  #
+ #   copies or substantial portions of the Software.                                 #
+ #                                                                                   #
+ #   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR      #
+ #   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,        #
+ #   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE     #
+ #   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER          #
+ #   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,   #
+ #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE   #
+ #   SOFTWARE.                                                                       #
+ #####################################################################################
+
+import logging
+from logging.handlers import RotatingFileHandler
+import os
+import errno
+
+
+class LoggerFactory(object):
+    
+    @staticmethod
+    def create(path, module_name):
+        # Create logger
+        logger = logging.getLogger(module_name)
+        logger.setLevel(logging.DEBUG)
+
+        try:
+            os.makedirs(path)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+
+        # Create file handler
+        fh = RotatingFileHandler(path + os.sep + module_name + '.log', maxBytes=1000000, backupCount=5)
+        fh.setLevel(logging.DEBUG)
+
+        # Create console handler
+        ch = logging.StreamHandler()
+        ch.setLevel(logging.INFO)
+
+        # Create formatter
+        formatter = logging.Formatter('%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(levelname)s - %(message)s')
+
+        # Add formatter to handlers
+        fh.setFormatter(formatter)
+        ch.setFormatter(formatter) # TODO: add another formatter to the console logger?
+
+        # Add fh and ch to logger
+        logger.addHandler(fh)
+        logger.addHandler(ch)
+
+        return logger