Skip to content
Snippets Groups Projects
Commit 44557ada authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Merge branch 'master' into 'farah_notation_and_related_work'

# Conflicts:
#   .gitignore
parents 7d764f9c feaaeb22
No related branches found
No related tags found
1 merge request!7Farah notation and related work
Showing
with 311 additions and 14 deletions
models/*
results/*
*/.kile/* */.kile/*
*.kilepr *.kilepr
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
...@@ -368,3 +371,6 @@ TSWLatexianTemp* ...@@ -368,3 +371,6 @@ TSWLatexianTemp*
*.lpz *.lpz
reports/*.pdf reports/*.pdf
# Image
*.png
...@@ -49,5 +49,16 @@ Project Organization ...@@ -49,5 +49,16 @@ Project Organization
Instal project Instal project
-------------- --------------
First install the project pacakge:
pip install -r requirements.txt pip install -r requirements.txt
Then create a file `.env` by copying the file `.env.example`:
cp .env.example .env
Then you must set the project directory in the `.env` file :
project_dir = "path/to/your/project/directory"
This directory will be used for storing the model parameters.
\ No newline at end of file
* Trouver des jeux de données pertinents * Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
* Entraîner et tester des forêts de différentes tailles * Fix ModelFactory.load function.
* Entraîner et tester en regression et classification * Fix model results loading in compute_results.py.
* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images) * Check that omp multiclasses classifier is working as expected.
* Entraîner avec différents hyperparamètres (d, profondeur, epsilon) * In the bayesian search computation, output a different file name depending on the task of the trained model.
* Appliquer OMP avec différentes valeurs de k (notamment un petit k) * Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
* Faire des figures * Prepare the json experiment files to run.
* Implémenter et comparer les systèmes concurrents \ No newline at end of file
\ No newline at end of file
from bolsonaro.utils import root_directory
print(str(root_directory))
\ No newline at end of file
from pathlib import Path
root_directory = Path(__file__).parent.parent.absolute()
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
File moved
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
class Dataset(object):
def __init__(self, task, X_train, X_dev, X_test, y_train,
y_dev, y_test):
self._task = task
self._X_train = X_train
self._X_dev = X_dev
self._X_test = X_test
self._y_train = y_train
self._y_dev = y_dev
self._y_test = y_test
@property
def task(self):
return self._task
@property
def dataset_parameters(self):
return self._dataset_parameters
@property
def X_train(self):
return self._X_train
@property
def X_dev(self):
return self._X_dev
@property
def X_test(self):
return self._X_test
@property
def y_train(self):
return self._y_train
@property
def y_dev(self):
return self._y_dev
@property
def y_test(self):
return self._y_test
from bolsonaro.data.dataset import Dataset
from bolsonaro.data.task import Task
from bolsonaro.utils import change_binary_func_load
from sklearn.datasets import load_boston, load_iris, load_diabetes, \
load_digits, load_linnerud, load_wine, load_breast_cancer
from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \
fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
class DatasetLoader(object):
@staticmethod
def load(dataset_parameters):
name = dataset_parameters.name
if name == 'boston':
dataset_loading_func = load_boston
task = Task.REGRESSION
elif name == 'iris':
dataset_loading_func = load_iris
task = Task.MULTICLASSIFICATION
elif name == 'diabetes':
dataset_loading_func = load_diabetes
task = Task.REGRESSION
elif name == 'digits':
dataset_loading_func = load_digits
task = Task.MULTICLASSIFICATION
elif name == 'linnerud':
dataset_loading_func = load_linnerud
task = Task.REGRESSION
elif name == 'wine':
dataset_loading_func = load_wine
task = Task.MULTICLASSIFICATION
elif name == 'breast_cancer':
dataset_loading_func = change_binary_func_load(load_breast_cancer)
task = Task.BINARYCLASSIFICATION
elif name == 'olivetti_faces': # bug (no return X_y)
dataset_loading_func = fetch_olivetti_faces
task = Task.MULTICLASSIFICATION
elif name == '20newsgroups': # bug (no return X_y)
dataset_loading_func = fetch_20newsgroups
task = Task.MULTICLASSIFICATION
elif name == '20newsgroups_vectorized':
dataset_loading_func = fetch_20newsgroups_vectorized
task = Task.MULTICLASSIFICATION
elif name == 'lfw_people': # needs PIL (image dataset)
dataset_loading_func = fetch_lfw_people
task = Task.MULTICLASSIFICATION
elif name == 'lfw_pairs':
dataset_loading_func = fetch_lfw_pairs
task = Task.MULTICLASSIFICATION
elif name == 'covtype':
dataset_loading_func = fetch_covtype
task = Task.MULTICLASSIFICATION
elif name == 'rcv1':
dataset_loading_func = fetch_rcv1
task = Task.MULTICLASSIFICATION
elif name == 'kddcup99':
dataset_loading_func = fetch_kddcup99
task = Task.MULTICLASSIFICATION
elif name == 'california_housing':
dataset_loading_func = fetch_california_housing
task = Task.REGRESSION
else:
raise ValueError("Unsupported dataset '{}'".format(name))
X, y = dataset_loading_func(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=dataset_parameters.test_size,
random_state=dataset_parameters.random_state)
X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train,
test_size=dataset_parameters.dev_size,
random_state=dataset_parameters.random_state)
if dataset_parameters.dataset_normalizer is not None:
if dataset_parameters.dataset_normalizer == 'standard':
scaler = preprocessing.StandardScaler()
elif dataset_parameters.dataset_normalizer == 'minmax':
scaler = preprocessing.MinMaxScaler()
elif dataset_parameters.dataset_normalizer == 'robust':
scaler = preprocessing.RobustScaler()
elif dataset_parameters.dataset_normalizer == 'normalizer':
scaler = preprocessing.Normalizer()
else:
raise ValueError("Unsupported normalizer '{}'".format(dataset_parameters.dataset_normalizer))
X_train = scaler.fit_transform(X_train)
X_dev = scaler.transform(X_dev)
X_test = scaler.transform(X_test)
return Dataset(task, X_train,
X_dev, X_test, y_train, y_dev, y_test)
from bolsonaro.utils import save_obj_to_json, load_obj_from_json
import os
class DatasetParameters(object):
def __init__(self, name, test_size, dev_size, random_state, dataset_normalizer):
self._name = name
self._test_size = test_size
self._dev_size = dev_size
self._random_state = random_state
self._dataset_normalizer = dataset_normalizer
@property
def name(self):
return self._name
@property
def test_size(self):
return self._test_size
@property
def dev_size(self):
return self._dev_size
@property
def random_state(self):
return self._random_state
@property
def dataset_normalizer(self):
return self._dataset_normalizer
def save(self, directory_path, experiment_id):
save_obj_to_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
self.__dict__)
@staticmethod
def load(directory_path, experiment_id):
return load_obj_from_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
DatasetParameters)
from enum import Enum
class Task(Enum):
BINARYCLASSIFICATION = 1
REGRESSION = 2
MULTICLASSIFICATION = 3
#####################################################################################
# MIT License #
# #
# Copyright (C) 2019 Charly Lamothe #
# #
# This file is part of VQ-VAE-Speech. #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal #
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
# SOFTWARE. #
#####################################################################################
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
#####################################################################################
# MIT License #
# #
# Copyright (C) 2019 Charly Lamothe #
# #
# This file is part of VQ-VAE-Speech. #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal #
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
# SOFTWARE. #
#####################################################################################
import logging
from logging.handlers import RotatingFileHandler
import os
import errno
class LoggerFactory(object):
@staticmethod
def create(path, module_name):
# Create logger
logger = logging.getLogger(module_name)
logger.setLevel(logging.DEBUG)
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
# Create file handler
fh = RotatingFileHandler(path + os.sep + module_name + '.log', maxBytes=1000000, backupCount=5)
fh.setLevel(logging.DEBUG)
# Create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# Create formatter
formatter = logging.Formatter('%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(levelname)s - %(message)s')
# Add formatter to handlers
fh.setFormatter(formatter)
ch.setFormatter(formatter) # TODO: add another formatter to the console logger?
# Add fh and ch to logger
logger.addHandler(fh)
logger.addHandler(ch)
return logger
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment