Skip to content
Snippets Groups Projects
Commit 44557ada authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Merge branch 'master' into 'farah_notation_and_related_work'

# Conflicts:
#   .gitignore
parents 7d764f9c feaaeb22
No related branches found
No related tags found
1 merge request!7Farah notation and related work
This commit is part of merge request !7. Comments created here will be created in the context of that merge request.
Showing
with 311 additions and 14 deletions
models/*
results/*
*/.kile/*
*.kilepr
# Byte-compiled / optimized / DLL files
......@@ -368,3 +371,6 @@ TSWLatexianTemp*
*.lpz
reports/*.pdf
# Image
*.png
......@@ -49,5 +49,16 @@ Project Organization
Instal project
--------------
First install the project pacakge:
pip install -r requirements.txt
Then create a file `.env` by copying the file `.env.example`:
cp .env.example .env
Then you must set the project directory in the `.env` file :
project_dir = "path/to/your/project/directory"
This directory will be used for storing the model parameters.
\ No newline at end of file
* Trouver des jeux de données pertinents
* Entraîner et tester des forêts de différentes tailles
* Entraîner et tester en regression et classification
* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images)
* Entraîner avec différents hyperparamètres (d, profondeur, epsilon)
* Appliquer OMP avec différentes valeurs de k (notamment un petit k)
* Faire des figures
* Implémenter et comparer les systèmes concurrents
\ No newline at end of file
* Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
* Fix ModelFactory.load function.
* Fix model results loading in compute_results.py.
* Check that omp multiclasses classifier is working as expected.
* In the bayesian search computation, output a different file name depending on the task of the trained model.
* Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
* Prepare the json experiment files to run.
\ No newline at end of file
from bolsonaro.utils import root_directory
print(str(root_directory))
\ No newline at end of file
from pathlib import Path
root_directory = Path(__file__).parent.parent.absolute()
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
File moved
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
class Dataset(object):
def __init__(self, task, X_train, X_dev, X_test, y_train,
y_dev, y_test):
self._task = task
self._X_train = X_train
self._X_dev = X_dev
self._X_test = X_test
self._y_train = y_train
self._y_dev = y_dev
self._y_test = y_test
@property
def task(self):
return self._task
@property
def dataset_parameters(self):
return self._dataset_parameters
@property
def X_train(self):
return self._X_train
@property
def X_dev(self):
return self._X_dev
@property
def X_test(self):
return self._X_test
@property
def y_train(self):
return self._y_train
@property
def y_dev(self):
return self._y_dev
@property
def y_test(self):
return self._y_test
from bolsonaro.data.dataset import Dataset
from bolsonaro.data.task import Task
from bolsonaro.utils import change_binary_func_load
from sklearn.datasets import load_boston, load_iris, load_diabetes, \
load_digits, load_linnerud, load_wine, load_breast_cancer
from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \
fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
class DatasetLoader(object):
@staticmethod
def load(dataset_parameters):
name = dataset_parameters.name
if name == 'boston':
dataset_loading_func = load_boston
task = Task.REGRESSION
elif name == 'iris':
dataset_loading_func = load_iris
task = Task.MULTICLASSIFICATION
elif name == 'diabetes':
dataset_loading_func = load_diabetes
task = Task.REGRESSION
elif name == 'digits':
dataset_loading_func = load_digits
task = Task.MULTICLASSIFICATION
elif name == 'linnerud':
dataset_loading_func = load_linnerud
task = Task.REGRESSION
elif name == 'wine':
dataset_loading_func = load_wine
task = Task.MULTICLASSIFICATION
elif name == 'breast_cancer':
dataset_loading_func = change_binary_func_load(load_breast_cancer)
task = Task.BINARYCLASSIFICATION
elif name == 'olivetti_faces': # bug (no return X_y)
dataset_loading_func = fetch_olivetti_faces
task = Task.MULTICLASSIFICATION
elif name == '20newsgroups': # bug (no return X_y)
dataset_loading_func = fetch_20newsgroups
task = Task.MULTICLASSIFICATION
elif name == '20newsgroups_vectorized':
dataset_loading_func = fetch_20newsgroups_vectorized
task = Task.MULTICLASSIFICATION
elif name == 'lfw_people': # needs PIL (image dataset)
dataset_loading_func = fetch_lfw_people
task = Task.MULTICLASSIFICATION
elif name == 'lfw_pairs':
dataset_loading_func = fetch_lfw_pairs
task = Task.MULTICLASSIFICATION
elif name == 'covtype':
dataset_loading_func = fetch_covtype
task = Task.MULTICLASSIFICATION
elif name == 'rcv1':
dataset_loading_func = fetch_rcv1
task = Task.MULTICLASSIFICATION
elif name == 'kddcup99':
dataset_loading_func = fetch_kddcup99
task = Task.MULTICLASSIFICATION
elif name == 'california_housing':
dataset_loading_func = fetch_california_housing
task = Task.REGRESSION
else:
raise ValueError("Unsupported dataset '{}'".format(name))
X, y = dataset_loading_func(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=dataset_parameters.test_size,
random_state=dataset_parameters.random_state)
X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train,
test_size=dataset_parameters.dev_size,
random_state=dataset_parameters.random_state)
if dataset_parameters.dataset_normalizer is not None:
if dataset_parameters.dataset_normalizer == 'standard':
scaler = preprocessing.StandardScaler()
elif dataset_parameters.dataset_normalizer == 'minmax':
scaler = preprocessing.MinMaxScaler()
elif dataset_parameters.dataset_normalizer == 'robust':
scaler = preprocessing.RobustScaler()
elif dataset_parameters.dataset_normalizer == 'normalizer':
scaler = preprocessing.Normalizer()
else:
raise ValueError("Unsupported normalizer '{}'".format(dataset_parameters.dataset_normalizer))
X_train = scaler.fit_transform(X_train)
X_dev = scaler.transform(X_dev)
X_test = scaler.transform(X_test)
return Dataset(task, X_train,
X_dev, X_test, y_train, y_dev, y_test)
from bolsonaro.utils import save_obj_to_json, load_obj_from_json
import os
class DatasetParameters(object):
def __init__(self, name, test_size, dev_size, random_state, dataset_normalizer):
self._name = name
self._test_size = test_size
self._dev_size = dev_size
self._random_state = random_state
self._dataset_normalizer = dataset_normalizer
@property
def name(self):
return self._name
@property
def test_size(self):
return self._test_size
@property
def dev_size(self):
return self._dev_size
@property
def random_state(self):
return self._random_state
@property
def dataset_normalizer(self):
return self._dataset_normalizer
def save(self, directory_path, experiment_id):
save_obj_to_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
self.__dict__)
@staticmethod
def load(directory_path, experiment_id):
return load_obj_from_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
DatasetParameters)
from enum import Enum
class Task(Enum):
BINARYCLASSIFICATION = 1
REGRESSION = 2
MULTICLASSIFICATION = 3
#####################################################################################
# MIT License #
# #
# Copyright (C) 2019 Charly Lamothe #
# #
# This file is part of VQ-VAE-Speech. #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal #
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
# SOFTWARE. #
#####################################################################################
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
#####################################################################################
# MIT License #
# #
# Copyright (C) 2019 Charly Lamothe #
# #
# This file is part of VQ-VAE-Speech. #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal #
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
# SOFTWARE. #
#####################################################################################
import logging
from logging.handlers import RotatingFileHandler
import os
import errno
class LoggerFactory(object):
@staticmethod
def create(path, module_name):
# Create logger
logger = logging.getLogger(module_name)
logger.setLevel(logging.DEBUG)
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
# Create file handler
fh = RotatingFileHandler(path + os.sep + module_name + '.log', maxBytes=1000000, backupCount=5)
fh.setLevel(logging.DEBUG)
# Create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# Create formatter
formatter = logging.Formatter('%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(levelname)s - %(message)s')
# Add formatter to handlers
fh.setFormatter(formatter)
ch.setFormatter(formatter) # TODO: add another formatter to the console logger?
# Add fh and ch to logger
logger.addHandler(fh)
logger.addHandler(ch)
return logger
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment