Merge branch 'wip_clean_scripts' into 'master'

clean scripts See merge request !3

Merge branch 'wip_clean_scripts' into 'master'
feaaeb22 · Charly Lamothe · 69dd8959 · ce8de502 · feaaeb22 · feaaeb22
Commit feaaeb22 authored Nov 29, 2019 by Charly Lamothe
--- a/.gitignore
+++ b/.gitignore
+models/*
+results/*
+*/.kile/*
+*.kilepr
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -87,3 +92,285 @@ target/
 # Mypy cache
 .mypy_cache/
+# latex 
+## Core latex/pdflatex auxiliary files:
+*.aux
+*.lof
+*.log
+*.lot
+*.fls
+*.out
+*.toc
+*.fmt
+*.fot
+*.cb
+*.cb2
+.*.lb
+## Intermediate documents:
+*.dvi
+*.xdv
+*-converted-to.*
+# these rules might exclude image files for figures etc.
+# *.ps
+# *.eps
+# *.pdf
+## Generated if empty string is given at "Please type another file name for output:"
+.pdf
+## Bibliography auxiliary files (bibtex/biblatex/biber):
+*.bbl
+*.bcf
+*.blg
+*-blx.aux
+*-blx.bib
+*.run.xml
+## Build tool auxiliary files:
+*.fdb_latexmk
+*.synctex
+*.synctex(busy)
+*.synctex.gz
+*.synctex.gz(busy)
+*.pdfsync
+## Build tool directories for auxiliary files
+# latexrun
+latex.out/
+## Auxiliary and intermediate files from other packages:
+# algorithms
+*.alg
+*.loa
+# achemso
+acs-*.bib
+# amsthm
+*.thm
+# beamer
+*.nav
+*.pre
+*.snm
+*.vrb
+# changes
+*.soc
+# comment
+*.cut
+# cprotect
+*.cpt
+# elsarticle (documentclass of Elsevier journals)
+*.spl
+# endnotes
+*.ent
+# fixme
+*.lox
+# feynmf/feynmp
+*.mf
+*.mp
+*.t[1-9]
+*.t[1-9][0-9]
+*.tfm
+#(r)(e)ledmac/(r)(e)ledpar
+*.end
+*.?end
+*.[1-9]
+*.[1-9][0-9]
+*.[1-9][0-9][0-9]
+*.[1-9]R
+*.[1-9][0-9]R
+*.[1-9][0-9][0-9]R
+*.eledsec[1-9]
+*.eledsec[1-9]R
+*.eledsec[1-9][0-9]
+*.eledsec[1-9][0-9]R
+*.eledsec[1-9][0-9][0-9]
+*.eledsec[1-9][0-9][0-9]R
+# glossaries
+*.acn
+*.acr
+*.glg
+*.glo
+*.gls
+*.glsdefs
+*.lzo
+*.lzs
+# uncomment this for glossaries-extra (will ignore makeindex's style files!)
+# *.ist
+# gnuplottex
+*-gnuplottex-*
+# gregoriotex
+*.gaux
+*.gtex
+# htlatex
+*.4ct
+*.4tc
+*.idv
+*.lg
+*.trc
+*.xref
+# hyperref
+*.brf
+# knitr
+*-concordance.tex
+# TODO Comment the next line if you want to keep your tikz graphics files
+*.tikz
+*-tikzDictionary
+# listings
+*.lol
+# luatexja-ruby
+*.ltjruby
+# makeidx
+*.idx
+*.ilg
+*.ind
+# minitoc
+*.maf
+*.mlf
+*.mlt
+*.mtc[0-9]*
+*.slf[0-9]*
+*.slt[0-9]*
+*.stc[0-9]*
+# minted
+_minted*
+*.pyg
+# morewrites
+*.mw
+# nomencl
+*.nlg
+*.nlo
+*.nls
+# pax
+*.pax
+# pdfpcnotes
+*.pdfpc
+# sagetex
+*.sagetex.sage
+*.sagetex.py
+*.sagetex.scmd
+# scrwfile
+*.wrt
+# sympy
+*.sout
+*.sympy
+sympy-plots-for-*.tex/
+# pdfcomment
+*.upa
+*.upb
+# pythontex
+*.pytxcode
+pythontex-files-*/
+# tcolorbox
+*.listing
+# thmtools
+*.loe
+# TikZ & PGF
+*.dpth
+*.md5
+*.auxlock
+# todonotes
+*.tdo
+# vhistory
+*.hst
+*.ver
+# easy-todo
+*.lod
+# xcolor
+*.xcp
+# xmpincl
+*.xmpi
+# xindy
+*.xdy
+# xypic precompiled matrices and outlines
+*.xyc
+*.xyd
+# endfloat
+*.ttt
+*.fff
+# Latexian
+TSWLatexianTemp*
+## Editors:
+# WinEdt
+*.bak
+*.sav
+# Texpad
+.texpadtmp
+# LyX
+*.lyx~
+# Kile
+*.backup
+# gummi
+.*.swp
+# KBibTeX
+*~[0-9]*
+# auto folder when using emacs and auctex
+./auto/*
+*.el
+# expex forward references with \gathertags
+*-tags.tex
+# standalone packages
+*.sta
+# Makeindex log files
+*.lpz
+reports/*.pdf
+# Image
+*.png
--- a/README.md
+++ b/README.md
@@ -49,5 +49,16 @@ Project Organization
 Instal project
 --------------
+First install the project pacakge:
 	pip install -r requirements.txt
+Then create a file `.env` by copying the file `.env.example`:
+	cp .env.example .env
+Then you must set the project directory in the `.env` file :
+	project_dir = "path/to/your/project/directory"	
+This directory will be used for storing the model parameters.
\ No newline at end of file
--- a/TODO.md
+++ b/TODO.md
-* Trouver des jeux de données pertinents
+* Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
-* Entraîner et tester des forêts de différentes tailles
+* Fix ModelFactory.load function.
-* Entraîner et tester en regression et classification
+* Fix model results loading in compute_results.py.
-* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images)
+* Check that omp multiclasses classifier is working as expected.
-* Entraîner avec différents hyperparamètres (d, profondeur, epsilon)
+* In the bayesian search computation, output a different file name depending on the task of the trained model.
-* Appliquer OMP avec différentes valeurs de k (notamment un petit k)
+* Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
-* Faire des figures
+* Prepare the json experiment files to run.
-* Implémenter et comparer les systèmes concurrents
\ No newline at end of file
\ No newline at end of file
--- a/bolsonaro/data/__init__.py
+++ b/bolsonaro/data/__init__.py
--- a/bolsonaro/data/make_dataset.py
+++ b/bolsonaro/data/make_dataset.py
--- a/bolsonaro/example.py
+++ b/bolsonaro/example.py
-from bolsonaro.utils import root_directory
-print(str(root_directory))
\ No newline at end of file
--- a/bolsonaro/models/__init__.py
+++ b/bolsonaro/models/__init__.py
--- a/bolsonaro/models/create_model.py
+++ b/bolsonaro/models/create_model.py
--- a/bolsonaro/utils.py
+++ b/bolsonaro/utils.py
-from pathlib import Path
-root_directory = Path(__file__).parent.parent.absolute()
--- a/bolsonaro/visualization/__init__.py
+++ b/bolsonaro/visualization/__init__.py
--- a/bolsonaro/visualization/visualize.py
+++ b/bolsonaro/visualization/visualize.py
--- a/code/bolsonaro/__init__.py
+++ b/code/bolsonaro/__init__.py
+import os
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
--- a/bolsonaro/data/.gitkeep
+++ b/bolsonaro/data/.gitkeep
--- a/code/bolsonaro/data/__init__.py
+++ b/code/bolsonaro/data/__init__.py
+import os
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
--- a/code/bolsonaro/data/dataset.py
+++ b/code/bolsonaro/data/dataset.py
+class Dataset(object):
+    def __init__(self, task, X_train, X_dev, X_test, y_train,
+        y_dev, y_test):
+        self._task = task
+        self._X_train = X_train
+        self._X_dev = X_dev
+        self._X_test = X_test
+        self._y_train = y_train
+        self._y_dev = y_dev
+        self._y_test = y_test
+    @property
+    def task(self):
+        return self._task
+    @property
+    def dataset_parameters(self):
+        return self._dataset_parameters
+    @property
+    def X_train(self):
+        return self._X_train
+    @property
+    def X_dev(self):
+        return self._X_dev
+    @property
+    def X_test(self):
+        return self._X_test
+    @property
+    def y_train(self):
+        return self._y_train
+    @property
+    def y_dev(self):
+        return self._y_dev
+    @property
+    def y_test(self):
+        return self._y_test
--- a/code/bolsonaro/data/dataset_loader.py
+++ b/code/bolsonaro/data/dataset_loader.py
+from bolsonaro.data.dataset import Dataset
+from bolsonaro.data.task import Task
+from bolsonaro.utils import change_binary_func_load
+from sklearn.datasets import load_boston, load_iris, load_diabetes, \
+    load_digits, load_linnerud, load_wine, load_breast_cancer
+from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
+    fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \
+    fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
+from sklearn.model_selection import train_test_split
+from sklearn import preprocessing
+class DatasetLoader(object):
+    @staticmethod
+    def load(dataset_parameters):
+        name = dataset_parameters.name
+        if name == 'boston':
+            dataset_loading_func = load_boston
+            task = Task.REGRESSION
+        elif name == 'iris':
+            dataset_loading_func = load_iris
+            task = Task.MULTICLASSIFICATION
+        elif name == 'diabetes':
+            dataset_loading_func = load_diabetes
+            task = Task.REGRESSION
+        elif name == 'digits':
+            dataset_loading_func = load_digits
+            task = Task.MULTICLASSIFICATION
+        elif name == 'linnerud':
+            dataset_loading_func = load_linnerud
+            task = Task.REGRESSION
+        elif name == 'wine':
+            dataset_loading_func = load_wine
+            task = Task.MULTICLASSIFICATION
+        elif name == 'breast_cancer':
+            dataset_loading_func = change_binary_func_load(load_breast_cancer)
+            task = Task.BINARYCLASSIFICATION
+        elif name == 'olivetti_faces':  # bug (no return X_y)
+            dataset_loading_func = fetch_olivetti_faces
+            task = Task.MULTICLASSIFICATION
+        elif name == '20newsgroups':  # bug (no return X_y)
+            dataset_loading_func = fetch_20newsgroups
+            task = Task.MULTICLASSIFICATION
+        elif name == '20newsgroups_vectorized':
+            dataset_loading_func = fetch_20newsgroups_vectorized
+            task = Task.MULTICLASSIFICATION
+        elif name == 'lfw_people':  # needs PIL (image dataset)
+            dataset_loading_func = fetch_lfw_people
+            task = Task.MULTICLASSIFICATION
+        elif name == 'lfw_pairs':
+            dataset_loading_func = fetch_lfw_pairs
+            task = Task.MULTICLASSIFICATION
+        elif name == 'covtype':
+            dataset_loading_func = fetch_covtype
+            task = Task.MULTICLASSIFICATION
+        elif name == 'rcv1':
+            dataset_loading_func = fetch_rcv1
+            task = Task.MULTICLASSIFICATION
+        elif name == 'kddcup99':
+            dataset_loading_func = fetch_kddcup99
+            task = Task.MULTICLASSIFICATION
+        elif name == 'california_housing':
+            dataset_loading_func = fetch_california_housing
+            task = Task.REGRESSION
+        else:
+            raise ValueError("Unsupported dataset '{}'".format(name))
+        X, y = dataset_loading_func(return_X_y=True)
+        X_train, X_test, y_train, y_test = train_test_split(X, y,
+            test_size=dataset_parameters.test_size,
+            random_state=dataset_parameters.random_state)
+        X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train,
+            test_size=dataset_parameters.dev_size,
+            random_state=dataset_parameters.random_state)
+        if dataset_parameters.dataset_normalizer is not None:
+            if dataset_parameters.dataset_normalizer == 'standard':
+                scaler = preprocessing.StandardScaler()
+            elif dataset_parameters.dataset_normalizer == 'minmax':
+                scaler = preprocessing.MinMaxScaler()
+            elif dataset_parameters.dataset_normalizer == 'robust':
+                scaler = preprocessing.RobustScaler()
+            elif dataset_parameters.dataset_normalizer == 'normalizer':
+                scaler = preprocessing.Normalizer()
+            else:
+                raise ValueError("Unsupported normalizer '{}'".format(dataset_parameters.dataset_normalizer))
+            X_train = scaler.fit_transform(X_train)
+            X_dev = scaler.transform(X_dev)
+            X_test = scaler.transform(X_test)
+        return Dataset(task, X_train,
+            X_dev, X_test, y_train, y_dev, y_test)
--- a/code/bolsonaro/data/dataset_parameters.py
+++ b/code/bolsonaro/data/dataset_parameters.py
+from bolsonaro.utils import save_obj_to_json, load_obj_from_json
+import os
+class DatasetParameters(object):
+    def __init__(self, name, test_size, dev_size, random_state, dataset_normalizer):
+        self._name = name
+        self._test_size = test_size
+        self._dev_size = dev_size
+        self._random_state = random_state
+        self._dataset_normalizer = dataset_normalizer
+    @property
+    def name(self):
+        return self._name
+    @property
+    def test_size(self):
+        return self._test_size
+    @property
+    def dev_size(self):
+        return self._dev_size
+    @property
+    def random_state(self):
+        return self._random_state
+    @property
+    def dataset_normalizer(self):
+        return self._dataset_normalizer
+    def save(self, directory_path, experiment_id):
+        save_obj_to_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
+            self.__dict__)
+    @staticmethod
+    def load(directory_path, experiment_id):
+        return load_obj_from_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
+            DatasetParameters)
--- a/code/bolsonaro/data/task.py
+++ b/code/bolsonaro/data/task.py
+from enum import Enum
+class Task(Enum):
+    BINARYCLASSIFICATION = 1
+    REGRESSION = 2
+    MULTICLASSIFICATION = 3
--- a/code/bolsonaro/error_handling/__init__.py
+++ b/code/bolsonaro/error_handling/__init__.py
+ #####################################################################################
+ # MIT License                                                                       #
+ #                                                                                   #
+ # Copyright (C) 2019 Charly Lamothe                                                 #
+ #                                                                                   #
+ # This file is part of VQ-VAE-Speech.                                               #
+ #                                                                                   #
+ #   Permission is hereby granted, free of charge, to any person obtaining a copy    #
+ #   of this software and associated documentation files (the "Software"), to deal   #
+ #   in the Software without restriction, including without limitation the rights    #
+ #   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell       #
+ #   copies of the Software, and to permit persons to whom the Software is           #
+ #   furnished to do so, subject to the following conditions:                        #
+ #                                                                                   #
+ #   The above copyright notice and this permission notice shall be included in all  #
+ #   copies or substantial portions of the Software.                                 #
+ #                                                                                   #
+ #   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR      #
+ #   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,        #
+ #   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE     #
+ #   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER          #
+ #   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,   #
+ #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE   #
+ #   SOFTWARE.                                                                       #
+ #####################################################################################
+import os
+LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
--- a/code/bolsonaro/error_handling/logger_factory.py
+++ b/code/bolsonaro/error_handling/logger_factory.py
+ #####################################################################################
+ # MIT License                                                                       #
+ #                                                                                   #
+ # Copyright (C) 2019 Charly Lamothe                                                 #
+ #                                                                                   #
+ # This file is part of VQ-VAE-Speech.                                               #
+ #                                                                                   #
+ #   Permission is hereby granted, free of charge, to any person obtaining a copy    #
+ #   of this software and associated documentation files (the "Software"), to deal   #
+ #   in the Software without restriction, including without limitation the rights    #
+ #   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell       #
+ #   copies of the Software, and to permit persons to whom the Software is           #
+ #   furnished to do so, subject to the following conditions:                        #
+ #                                                                                   #
+ #   The above copyright notice and this permission notice shall be included in all  #
+ #   copies or substantial portions of the Software.                                 #
+ #                                                                                   #
+ #   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR      #
+ #   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,        #
+ #   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE     #
+ #   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER          #
+ #   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,   #
+ #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE   #
+ #   SOFTWARE.                                                                       #
+ #####################################################################################
+import logging
+from logging.handlers import RotatingFileHandler
+import os
+import errno
+class LoggerFactory(object):
+    @staticmethod
+    def create(path, module_name):
+        # Create logger
+        logger = logging.getLogger(module_name)
+        logger.setLevel(logging.DEBUG)
+        try:
+            os.makedirs(path)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        # Create file handler
+        fh = RotatingFileHandler(path + os.sep + module_name + '.log', maxBytes=1000000, backupCount=5)
+        fh.setLevel(logging.DEBUG)
+        # Create console handler
+        ch = logging.StreamHandler()
+        ch.setLevel(logging.INFO)
+        # Create formatter
+        formatter = logging.Formatter('%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(levelname)s - %(message)s')
+        # Add formatter to handlers
+        fh.setFormatter(formatter)
+        ch.setFormatter(formatter) # TODO: add another formatter to the console logger?
+        # Add fh and ch to logger
+        logger.addHandler(fh)
+        logger.addHandler(ch)
+        return logger