diff --git a/.gitignore b/.gitignore index d7c9832ff3c60eeb0b764a3b8d40fa4dad2cfee5..9cc2e27f5cc3bd830b9038f8562b9edc51eb3f19 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +*/.kile/* +*.kilepr # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -87,3 +89,282 @@ target/ # Mypy cache .mypy_cache/ + + +# latex + +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Comment the next line if you want to keep your tikz graphics files +*.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +reports/*.pdf diff --git a/bolsonaro/__init__.py b/code/bolsonaro/__init__.py similarity index 100% rename from bolsonaro/__init__.py rename to code/bolsonaro/__init__.py diff --git a/bolsonaro/data/.gitkeep b/code/bolsonaro/data/.gitkeep similarity index 100% rename from bolsonaro/data/.gitkeep rename to code/bolsonaro/data/.gitkeep diff --git a/bolsonaro/data/__init__.py b/code/bolsonaro/data/__init__.py similarity index 100% rename from bolsonaro/data/__init__.py rename to code/bolsonaro/data/__init__.py diff --git a/bolsonaro/data/dataset.py b/code/bolsonaro/data/dataset.py similarity index 100% rename from bolsonaro/data/dataset.py rename to code/bolsonaro/data/dataset.py diff --git a/bolsonaro/data/dataset_loader.py b/code/bolsonaro/data/dataset_loader.py similarity index 97% rename from bolsonaro/data/dataset_loader.py rename to code/bolsonaro/data/dataset_loader.py index c510a90392e597ac0c54ffb26d537bcac9ccf37e..6ad4b1f769d35b67b9ebcec6dae6b03ed68607e7 100644 --- a/bolsonaro/data/dataset_loader.py +++ b/code/bolsonaro/data/dataset_loader.py @@ -1,79 +1,79 @@ -from bolsonaro.data.dataset import Dataset -from bolsonaro.data.task import Task - -from sklearn.datasets import load_boston, load_iris, load_diabetes, load_digits, load_linnerud, load_wine, load_breast_cancer -from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \ - fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \ - fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing -from sklearn.model_selection import train_test_split - - -class DatasetLoader(object): - - @staticmethod - def load_from_name(dataset_parameters): - name = dataset_parameters.name - if name == 'boston': - dataset_loading_func = load_boston - task = Task.REGRESSION - elif name == 'iris': - dataset_loading_func = load_iris - task = Task.CLASSIFICATION - elif name == 'diabetes': - dataset_loading_func = load_diabetes - task = Task.REGRESSION - elif name == 'digits': - dataset_loading_func = load_digits - task = Task.CLASSIFICATION - elif name == 'linnerud': - dataset_loading_func = load_linnerud - task = Task.REGRESSION - elif name == 'wine': - dataset_loading_func = load_wine - task = Task.CLASSIFICATION - elif name == 'breast_cancer': - dataset_loading_func = load_breast_cancer - task = Task.CLASSIFICATION - elif name == 'olivetti_faces': - dataset_loading_func = fetch_olivetti_faces - task = Task.CLASSIFICATION - elif name == '20newsgroups': - dataset_loading_func = fetch_20newsgroups - task = Task.CLASSIFICATION - elif name == '20newsgroups_vectorized': - dataset_loading_func = fetch_20newsgroups_vectorized - task = Task.CLASSIFICATION - elif name == 'lfw_people': - dataset_loading_func = fetch_lfw_people - task = Task.CLASSIFICATION - elif name == 'lfw_pairs': - dataset_loading_func = fetch_lfw_pairs - elif name == 'covtype': - dataset_loading_func = fetch_covtype - task = Task.CLASSIFICATION - elif name == 'rcv1': - dataset_loading_func = fetch_rcv1 - task = Task.CLASSIFICATION - elif name == 'kddcup99': - dataset_loading_func = fetch_kddcup99 - task = Task.CLASSIFICATION - elif name == 'california_housing': - dataset_loading_func = fetch_california_housing - task = Task.REGRESSION - else: - raise ValueError("Unsupported dataset '{}'".format(name)) - - X, y = dataset_loading_func(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, - test_size=dataset_parameters.test_size, - random_state=dataset_parameters.random_state) - X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, - test_size=dataset_parameters.dev_size, - random_state=dataset_parameters.random_state) - - # TODO - if dataset_parameters.normalize: - pass - - return Dataset(task, dataset_parameters, X_train, - X_dev, X_test, y_train, y_dev, y_test) +from bolsonaro.data.dataset import Dataset +from bolsonaro.data.task import Task + +from sklearn.datasets import load_boston, load_iris, load_diabetes, load_digits, load_linnerud, load_wine, load_breast_cancer +from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \ + fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \ + fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing +from sklearn.model_selection import train_test_split + + +class DatasetLoader(object): + + @staticmethod + def load_from_name(dataset_parameters): + name = dataset_parameters.name + if name == 'boston': + dataset_loading_func = load_boston + task = Task.REGRESSION + elif name == 'iris': + dataset_loading_func = load_iris + task = Task.CLASSIFICATION + elif name == 'diabetes': + dataset_loading_func = load_diabetes + task = Task.REGRESSION + elif name == 'digits': + dataset_loading_func = load_digits + task = Task.CLASSIFICATION + elif name == 'linnerud': + dataset_loading_func = load_linnerud + task = Task.REGRESSION + elif name == 'wine': + dataset_loading_func = load_wine + task = Task.CLASSIFICATION + elif name == 'breast_cancer': + dataset_loading_func = load_breast_cancer + task = Task.CLASSIFICATION + elif name == 'olivetti_faces': + dataset_loading_func = fetch_olivetti_faces + task = Task.CLASSIFICATION + elif name == '20newsgroups': + dataset_loading_func = fetch_20newsgroups + task = Task.CLASSIFICATION + elif name == '20newsgroups_vectorized': + dataset_loading_func = fetch_20newsgroups_vectorized + task = Task.CLASSIFICATION + elif name == 'lfw_people': + dataset_loading_func = fetch_lfw_people + task = Task.CLASSIFICATION + elif name == 'lfw_pairs': + dataset_loading_func = fetch_lfw_pairs + elif name == 'covtype': + dataset_loading_func = fetch_covtype + task = Task.CLASSIFICATION + elif name == 'rcv1': + dataset_loading_func = fetch_rcv1 + task = Task.CLASSIFICATION + elif name == 'kddcup99': + dataset_loading_func = fetch_kddcup99 + task = Task.CLASSIFICATION + elif name == 'california_housing': + dataset_loading_func = fetch_california_housing + task = Task.REGRESSION + else: + raise ValueError("Unsupported dataset '{}'".format(name)) + + X, y = dataset_loading_func(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, + test_size=dataset_parameters.test_size, + random_state=dataset_parameters.random_state) + X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, + test_size=dataset_parameters.dev_size, + random_state=dataset_parameters.random_state) + + # TODO + if dataset_parameters.normalize: + pass + + return Dataset(task, dataset_parameters, X_train, + X_dev, X_test, y_train, y_dev, y_test) diff --git a/bolsonaro/data/dataset_parameters.py b/code/bolsonaro/data/dataset_parameters.py similarity index 100% rename from bolsonaro/data/dataset_parameters.py rename to code/bolsonaro/data/dataset_parameters.py diff --git a/bolsonaro/data/task.py b/code/bolsonaro/data/task.py similarity index 100% rename from bolsonaro/data/task.py rename to code/bolsonaro/data/task.py diff --git a/bolsonaro/error_handling/__init__.py b/code/bolsonaro/error_handling/__init__.py similarity index 100% rename from bolsonaro/error_handling/__init__.py rename to code/bolsonaro/error_handling/__init__.py diff --git a/bolsonaro/error_handling/color_print.py b/code/bolsonaro/error_handling/color_print.py similarity index 100% rename from bolsonaro/error_handling/color_print.py rename to code/bolsonaro/error_handling/color_print.py diff --git a/bolsonaro/error_handling/console_logger.py b/code/bolsonaro/error_handling/console_logger.py similarity index 98% rename from bolsonaro/error_handling/console_logger.py rename to code/bolsonaro/error_handling/console_logger.py index 7014b4c922c41f108f39331a3061d29703aa7f90..93d34d67c8720be6ffccadd5fb37704c9e40a78e 100644 --- a/bolsonaro/error_handling/console_logger.py +++ b/code/bolsonaro/error_handling/console_logger.py @@ -24,7 +24,7 @@ # SOFTWARE. # ##################################################################################### -from error_handling.color_print import ColorPrint +from bolsonaro.error_handling.color_print import ColorPrint import sys import traceback diff --git a/bolsonaro/error_handling/exception_decorators.py b/code/bolsonaro/error_handling/exception_decorators.py similarity index 100% rename from bolsonaro/error_handling/exception_decorators.py rename to code/bolsonaro/error_handling/exception_decorators.py diff --git a/bolsonaro/error_handling/logger_factory.py b/code/bolsonaro/error_handling/logger_factory.py similarity index 100% rename from bolsonaro/error_handling/logger_factory.py rename to code/bolsonaro/error_handling/logger_factory.py diff --git a/bolsonaro/models/.gitkeep b/code/bolsonaro/models/.gitkeep similarity index 100% rename from bolsonaro/models/.gitkeep rename to code/bolsonaro/models/.gitkeep diff --git a/bolsonaro/models/__init__.py b/code/bolsonaro/models/__init__.py similarity index 100% rename from bolsonaro/models/__init__.py rename to code/bolsonaro/models/__init__.py diff --git a/bolsonaro/models/model_factory.py b/code/bolsonaro/models/model_factory.py similarity index 100% rename from bolsonaro/models/model_factory.py rename to code/bolsonaro/models/model_factory.py diff --git a/bolsonaro/models/model_parameters.py b/code/bolsonaro/models/model_parameters.py similarity index 100% rename from bolsonaro/models/model_parameters.py rename to code/bolsonaro/models/model_parameters.py diff --git a/bolsonaro/models/omp_forest_classifier.py b/code/bolsonaro/models/omp_forest_classifier.py similarity index 100% rename from bolsonaro/models/omp_forest_classifier.py rename to code/bolsonaro/models/omp_forest_classifier.py diff --git a/bolsonaro/models/omp_forest_regressor.py b/code/bolsonaro/models/omp_forest_regressor.py similarity index 100% rename from bolsonaro/models/omp_forest_regressor.py rename to code/bolsonaro/models/omp_forest_regressor.py diff --git a/bolsonaro/trainer.py b/code/bolsonaro/trainer.py similarity index 100% rename from bolsonaro/trainer.py rename to code/bolsonaro/trainer.py diff --git a/bolsonaro/utils.py b/code/bolsonaro/utils.py similarity index 100% rename from bolsonaro/utils.py rename to code/bolsonaro/utils.py diff --git a/bolsonaro/visualization/.gitkeep b/code/bolsonaro/visualization/.gitkeep similarity index 100% rename from bolsonaro/visualization/.gitkeep rename to code/bolsonaro/visualization/.gitkeep diff --git a/bolsonaro/visualization/__init__.py b/code/bolsonaro/visualization/__init__.py similarity index 100% rename from bolsonaro/visualization/__init__.py rename to code/bolsonaro/visualization/__init__.py diff --git a/bolsonaro/visualization/plotter.py b/code/bolsonaro/visualization/plotter.py similarity index 100% rename from bolsonaro/visualization/plotter.py rename to code/bolsonaro/visualization/plotter.py diff --git a/bolsonaro/visualization/visualize.py b/code/bolsonaro/visualization/visualize.py similarity index 100% rename from bolsonaro/visualization/visualize.py rename to code/bolsonaro/visualization/visualize.py diff --git a/compute_results.py b/code/compute_results.py similarity index 100% rename from compute_results.py rename to code/compute_results.py diff --git a/train.py b/code/train.py similarity index 100% rename from train.py rename to code/train.py diff --git a/visualize.py b/code/visualize.py similarity index 100% rename from visualize.py rename to code/visualize.py diff --git a/requirements.txt b/requirements.txt index d4f7d11c284ddfa9486b93a090035aaa37fca265..3b11a342f72cb6c2a1a437124bb81c48da9e3c3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ coverage awscli flake8 python-dotenv>=0.5.1 +scikit-learn \ No newline at end of file diff --git a/setup.py b/setup.py index 08242fab42758dba87119ee01babd47d701b1864..592074efa2a4c95a89b8a287921811835a2cc35e 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,8 @@ from setuptools import find_packages, setup setup( name='bolsonaro', - packages=find_packages(), + packages=find_packages(where="code", exclude=['doc', 'dev']), + package_dir={'': "code"}, version='0.1.0', description='Bolsonaro project of QARMA non-permanents: deforesting random forest using OMP.', author='QARMA team',