Skip to content
Snippets Groups Projects
Commit feaaeb22 authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Merge branch 'wip_clean_scripts' into 'master'

clean scripts

See merge request !3
parents 69dd8959 ce8de502
No related branches found
No related tags found
1 merge request!3clean scripts
Showing
with 592 additions and 14 deletions
models/*
results/*
*/.kile/*
*.kilepr
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
...@@ -87,3 +92,285 @@ target/ ...@@ -87,3 +92,285 @@ target/
# Mypy cache # Mypy cache
.mypy_cache/ .mypy_cache/
# latex
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
.pdf
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
# fixme
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# TODO Comment the next line if you want to keep your tikz graphics files
*.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# todonotes
*.tdo
# vhistory
*.hst
*.ver
# easy-todo
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
reports/*.pdf
# Image
*.png
...@@ -49,5 +49,16 @@ Project Organization ...@@ -49,5 +49,16 @@ Project Organization
Instal project Instal project
-------------- --------------
First install the project pacakge:
pip install -r requirements.txt pip install -r requirements.txt
Then create a file `.env` by copying the file `.env.example`:
cp .env.example .env
Then you must set the project directory in the `.env` file :
project_dir = "path/to/your/project/directory"
This directory will be used for storing the model parameters.
\ No newline at end of file
* Trouver des jeux de données pertinents * Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
* Entraîner et tester des forêts de différentes tailles * Fix ModelFactory.load function.
* Entraîner et tester en regression et classification * Fix model results loading in compute_results.py.
* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images) * Check that omp multiclasses classifier is working as expected.
* Entraîner avec différents hyperparamètres (d, profondeur, epsilon) * In the bayesian search computation, output a different file name depending on the task of the trained model.
* Appliquer OMP avec différentes valeurs de k (notamment un petit k) * Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
* Faire des figures * Prepare the json experiment files to run.
* Implémenter et comparer les systèmes concurrents \ No newline at end of file
\ No newline at end of file
from bolsonaro.utils import root_directory
print(str(root_directory))
\ No newline at end of file
from pathlib import Path
root_directory = Path(__file__).parent.parent.absolute()
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
File moved
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
class Dataset(object):
def __init__(self, task, X_train, X_dev, X_test, y_train,
y_dev, y_test):
self._task = task
self._X_train = X_train
self._X_dev = X_dev
self._X_test = X_test
self._y_train = y_train
self._y_dev = y_dev
self._y_test = y_test
@property
def task(self):
return self._task
@property
def dataset_parameters(self):
return self._dataset_parameters
@property
def X_train(self):
return self._X_train
@property
def X_dev(self):
return self._X_dev
@property
def X_test(self):
return self._X_test
@property
def y_train(self):
return self._y_train
@property
def y_dev(self):
return self._y_dev
@property
def y_test(self):
return self._y_test
from bolsonaro.data.dataset import Dataset
from bolsonaro.data.task import Task
from bolsonaro.utils import change_binary_func_load
from sklearn.datasets import load_boston, load_iris, load_diabetes, \
load_digits, load_linnerud, load_wine, load_breast_cancer
from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \
fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
class DatasetLoader(object):
@staticmethod
def load(dataset_parameters):
name = dataset_parameters.name
if name == 'boston':
dataset_loading_func = load_boston
task = Task.REGRESSION
elif name == 'iris':
dataset_loading_func = load_iris
task = Task.MULTICLASSIFICATION
elif name == 'diabetes':
dataset_loading_func = load_diabetes
task = Task.REGRESSION
elif name == 'digits':
dataset_loading_func = load_digits
task = Task.MULTICLASSIFICATION
elif name == 'linnerud':
dataset_loading_func = load_linnerud
task = Task.REGRESSION
elif name == 'wine':
dataset_loading_func = load_wine
task = Task.MULTICLASSIFICATION
elif name == 'breast_cancer':
dataset_loading_func = change_binary_func_load(load_breast_cancer)
task = Task.BINARYCLASSIFICATION
elif name == 'olivetti_faces': # bug (no return X_y)
dataset_loading_func = fetch_olivetti_faces
task = Task.MULTICLASSIFICATION
elif name == '20newsgroups': # bug (no return X_y)
dataset_loading_func = fetch_20newsgroups
task = Task.MULTICLASSIFICATION
elif name == '20newsgroups_vectorized':
dataset_loading_func = fetch_20newsgroups_vectorized
task = Task.MULTICLASSIFICATION
elif name == 'lfw_people': # needs PIL (image dataset)
dataset_loading_func = fetch_lfw_people
task = Task.MULTICLASSIFICATION
elif name == 'lfw_pairs':
dataset_loading_func = fetch_lfw_pairs
task = Task.MULTICLASSIFICATION
elif name == 'covtype':
dataset_loading_func = fetch_covtype
task = Task.MULTICLASSIFICATION
elif name == 'rcv1':
dataset_loading_func = fetch_rcv1
task = Task.MULTICLASSIFICATION
elif name == 'kddcup99':
dataset_loading_func = fetch_kddcup99
task = Task.MULTICLASSIFICATION
elif name == 'california_housing':
dataset_loading_func = fetch_california_housing
task = Task.REGRESSION
else:
raise ValueError("Unsupported dataset '{}'".format(name))
X, y = dataset_loading_func(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=dataset_parameters.test_size,
random_state=dataset_parameters.random_state)
X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train,
test_size=dataset_parameters.dev_size,
random_state=dataset_parameters.random_state)
if dataset_parameters.dataset_normalizer is not None:
if dataset_parameters.dataset_normalizer == 'standard':
scaler = preprocessing.StandardScaler()
elif dataset_parameters.dataset_normalizer == 'minmax':
scaler = preprocessing.MinMaxScaler()
elif dataset_parameters.dataset_normalizer == 'robust':
scaler = preprocessing.RobustScaler()
elif dataset_parameters.dataset_normalizer == 'normalizer':
scaler = preprocessing.Normalizer()
else:
raise ValueError("Unsupported normalizer '{}'".format(dataset_parameters.dataset_normalizer))
X_train = scaler.fit_transform(X_train)
X_dev = scaler.transform(X_dev)
X_test = scaler.transform(X_test)
return Dataset(task, X_train,
X_dev, X_test, y_train, y_dev, y_test)
from bolsonaro.utils import save_obj_to_json, load_obj_from_json
import os
class DatasetParameters(object):
def __init__(self, name, test_size, dev_size, random_state, dataset_normalizer):
self._name = name
self._test_size = test_size
self._dev_size = dev_size
self._random_state = random_state
self._dataset_normalizer = dataset_normalizer
@property
def name(self):
return self._name
@property
def test_size(self):
return self._test_size
@property
def dev_size(self):
return self._dev_size
@property
def random_state(self):
return self._random_state
@property
def dataset_normalizer(self):
return self._dataset_normalizer
def save(self, directory_path, experiment_id):
save_obj_to_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
self.__dict__)
@staticmethod
def load(directory_path, experiment_id):
return load_obj_from_json(directory_path + os.sep + 'dataset_parameters_{}.json'.format(experiment_id),
DatasetParameters)
from enum import Enum
class Task(Enum):
BINARYCLASSIFICATION = 1
REGRESSION = 2
MULTICLASSIFICATION = 3
#####################################################################################
# MIT License #
# #
# Copyright (C) 2019 Charly Lamothe #
# #
# This file is part of VQ-VAE-Speech. #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal #
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
# SOFTWARE. #
#####################################################################################
import os
LOG_PATH = os.path.abspath(os.path.dirname(__file__) + os.sep + '..' + os.sep + '..' + os.sep + 'log')
#####################################################################################
# MIT License #
# #
# Copyright (C) 2019 Charly Lamothe #
# #
# This file is part of VQ-VAE-Speech. #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal #
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
# SOFTWARE. #
#####################################################################################
import logging
from logging.handlers import RotatingFileHandler
import os
import errno
class LoggerFactory(object):
@staticmethod
def create(path, module_name):
# Create logger
logger = logging.getLogger(module_name)
logger.setLevel(logging.DEBUG)
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
# Create file handler
fh = RotatingFileHandler(path + os.sep + module_name + '.log', maxBytes=1000000, backupCount=5)
fh.setLevel(logging.DEBUG)
# Create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# Create formatter
formatter = logging.Formatter('%(asctime)s - %(filename)s:%(lineno)s - %(name)s - %(levelname)s - %(message)s')
# Add formatter to handlers
fh.setFormatter(formatter)
ch.setFormatter(formatter) # TODO: add another formatter to the console logger?
# Add fh and ch to logger
logger.addHandler(fh)
logger.addHandler(ch)
return logger
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment