Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results

Target

Select target project
No results found
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results
Show changes
272 files
+ 63870
350
Compare changes
  • Side-by-side
  • Inline

Files

+1 −7
Original line number Original line Diff line number Diff line
models/*
models/*
results/*
experiments/unnamed/


*/.kile/*
*/.kile/*
*.kilepr
*.kilepr
@@ -80,9 +80,6 @@ target/
# Jupyter NB Checkpoints
# Jupyter NB Checkpoints
.ipynb_checkpoints/
.ipynb_checkpoints/


# exclude data from source control by default
/data/

# Mac OS-specific storage files
# Mac OS-specific storage files
.DS_Store
.DS_Store


@@ -371,6 +368,3 @@ TSWLatexianTemp*
*.lpz
*.lpz


reports/*.pdf
reports/*.pdf

# Image
*.png
+1 −5
Original line number Original line Diff line number Diff line
* Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
* Fix ModelFactory.load function.
* Fix model results loading in compute_results.py.
* Fix model results loading in compute_results.py.
* Check that omp multiclasses classifier is working as expected.
* Check that omp multiclasses classifier is working as expected.
* In the bayesian search computation, output a different file name depending on the task of the trained model.
* Fix the dataset error of fetcher when job_number > 1.
* Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
 No newline at end of file
* Prepare the json experiment files to run.
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -14,10 +14,6 @@ class Dataset(object):
    def task(self):
    def task(self):
        return self._task
        return self._task


    @property
    def dataset_parameters(self):
        return self._dataset_parameters

    @property
    @property
    def X_train(self):
    def X_train(self):
        return self._X_train
        return self._X_train
Original line number Original line Diff line number Diff line
from bolsonaro.data.dataset import Dataset
from bolsonaro.data.dataset import Dataset
from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.task import Task
from bolsonaro.data.task import Task
from bolsonaro.utils import change_binary_func_load
from bolsonaro.utils import change_binary_func_load


@@ -9,13 +10,38 @@ from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
    fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
    fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import preprocessing
import random
import pandas as pd




class DatasetLoader(object):
class DatasetLoader(object):


    DEFAULT_DATASET_NAME = 'boston'
    DEFAULT_NORMALIZE_D = False
    DEFAULT_DATASET_NORMALIZER = 'standard'
    DEFAULT_FOREST_SIZE = 100
    DEFAULT_EXTRACTED_FOREST_SIZE_SAMPLES = 5
    DEFAULT_EXTRACTED_FOREST_SIZE_STOP = 0.1
    DEFAULT_DEV_SIZE = 0.2
    DEFAULT_TEST_SIZE = 0.2
    DEFAULT_RANDOM_SEED_NUMBER = 1
    DEFAULT_SUBSETS_USED = 'train,dev'
    DEFAULT_NORMALIZE_WEIGHTS = False

    dataset_names = ['boston', 'iris', 'diabetes', 'digits', 'linnerud', 'wine',
        'breast_cancer', 'olivetti_faces', '20newsgroups_vectorized', 'lfw_people',
        'lfw_pairs', 'covtype', 'rcv1', 'california_housing', 'diamonds']

    dataset_seed_numbers = {'boston':15, 'iris':15, 'diabetes':15, 'digits':5,
        'linnerud':15, 'wine':15, 'breast_cancer':15, 'olivetti_faces':15,
        '20newsgroups_vectorized':3, 'lfw_people':3,
        'lfw_pairs':3, 'covtype':3, 'rcv1':3, 'california_housing':3,
        'diamonds': 15}

    @staticmethod
    @staticmethod
    def load(dataset_parameters):
    def load(dataset_parameters):
        name = dataset_parameters.name
        name = dataset_parameters.name
        X, y = None, None
        if name == 'boston':
        if name == 'boston':
            dataset_loading_func = load_boston
            dataset_loading_func = load_boston
            task = Task.REGRESSION
            task = Task.REGRESSION
@@ -37,37 +63,52 @@ class DatasetLoader(object):
        elif name == 'breast_cancer':
        elif name == 'breast_cancer':
            dataset_loading_func = change_binary_func_load(load_breast_cancer)
            dataset_loading_func = change_binary_func_load(load_breast_cancer)
            task = Task.BINARYCLASSIFICATION
            task = Task.BINARYCLASSIFICATION
        elif name == 'olivetti_faces':  # bug (no return X_y)
        elif name == 'olivetti_faces':
            dataset_loading_func = fetch_olivetti_faces
            dataset = fetch_olivetti_faces(random_state=dataset_parameters.random_state, shuffle=True)
            task = Task.MULTICLASSIFICATION
        elif name == '20newsgroups':  # bug (no return X_y)
            dataset_loading_func = fetch_20newsgroups
            task = Task.MULTICLASSIFICATION
            task = Task.MULTICLASSIFICATION
            X, y = dataset.data, dataset.target
        elif name == '20newsgroups_vectorized':
        elif name == '20newsgroups_vectorized':
            dataset_loading_func = fetch_20newsgroups_vectorized
            dataset = fetch_20newsgroups_vectorized()
            X, y = dataset.data, dataset.target
            task = Task.MULTICLASSIFICATION
            task = Task.MULTICLASSIFICATION
        elif name == 'lfw_people':  # needs PIL (image dataset)
        elif name == 'lfw_people':
            dataset_loading_func = fetch_lfw_people
            dataset = fetch_lfw_people()
            X, y = dataset.data, dataset.target
            task = Task.MULTICLASSIFICATION
            task = Task.MULTICLASSIFICATION
        elif name == 'lfw_pairs':
        elif name == 'lfw_pairs':
            dataset_loading_func = fetch_lfw_pairs
            dataset = fetch_lfw_pairs()
            X, y = dataset.data, dataset.target
            task = Task.MULTICLASSIFICATION
            task = Task.MULTICLASSIFICATION
        elif name == 'covtype':
        elif name == 'covtype':
            dataset_loading_func = fetch_covtype
            X, y = fetch_covtype(random_state=dataset_parameters.random_state, shuffle=True, return_X_y=True)
            task = Task.MULTICLASSIFICATION
            task = Task.MULTICLASSIFICATION
        elif name == 'rcv1':
        elif name == 'rcv1':
            dataset_loading_func = fetch_rcv1
            X, y = fetch_rcv1(random_state=dataset_parameters.random_state, shuffle=True, return_X_y=True)
            task = Task.MULTICLASSIFICATION
        elif name == 'kddcup99':
            dataset_loading_func = fetch_kddcup99
            task = Task.MULTICLASSIFICATION
            task = Task.MULTICLASSIFICATION
        elif name == 'california_housing':
        elif name == 'california_housing':
            dataset_loading_func = fetch_california_housing
            X, y = fetch_california_housing(return_X_y=True)
            task = Task.REGRESSION
        elif name == 'diamonds':
            # TODO: make a proper fetcher instead of the following code
            from sklearn.preprocessing import LabelEncoder
            df = pd.read_csv('data/diamonds.csv')
            df.drop(['Unnamed: 0'], axis=1 , inplace=True)
            df = df[(df[['x','y','z']] != 0).all(axis=1)]
            df.drop(['x','y','z'], axis=1, inplace= True)
            label_cut = LabelEncoder()
            label_color = LabelEncoder()
            label_clarity = LabelEncoder()
            df['cut'] = label_cut.fit_transform(df['cut'])
            df['color'] = label_color.fit_transform(df['color'])
            df['clarity'] = label_clarity.fit_transform(df['clarity'])
            X, y = df.drop(['price'], axis=1), df['price']
            task = Task.REGRESSION
            task = Task.REGRESSION
        else:
        else:
            raise ValueError("Unsupported dataset '{}'".format(name))
            raise ValueError("Unsupported dataset '{}'".format(name))


        if X is None:
            X, y = dataset_loading_func(return_X_y=True)
            X, y = dataset_loading_func(return_X_y=True)

        X_train, X_test, y_train, y_test = train_test_split(X, y,
        X_train, X_test, y_train, y_test = train_test_split(X, y,
            test_size=dataset_parameters.test_size,
            test_size=dataset_parameters.test_size,
            random_state=dataset_parameters.random_state)
            random_state=dataset_parameters.random_state)
@@ -92,3 +133,20 @@ class DatasetLoader(object):


        return Dataset(task, X_train,
        return Dataset(task, X_train,
            X_dev, X_test, y_train, y_dev, y_test)
            X_dev, X_test, y_train, y_dev, y_test)

    @staticmethod
    def load_default(dataset_name, seed):
        begin_random_seed_range = 1
        end_random_seed_range = 2000

        seed = seed if seed else random.randint(begin_random_seed_range, end_random_seed_range)

        dataset_parameters = DatasetParameters(
            name=dataset_name,
            test_size=DatasetLoader.DEFAULT_TEST_SIZE,
            dev_size=DatasetLoader.DEFAULT_DEV_SIZE,
            random_state=seed,
            dataset_normalizer=DatasetLoader.DEFAULT_DATASET_NORMALIZER
        )

        return DatasetLoader.load(dataset_parameters)
Original line number Original line Diff line number Diff line
from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.data.task import Task
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.data.task import Task


from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import os
import os
import pickle
import pickle


@@ -11,22 +13,35 @@ class ModelFactory(object):


    @staticmethod
    @staticmethod
    def build(task, model_parameters):
    def build(task, model_parameters):
        if task not in [Task.BINARYCLASSIFICATION, Task.REGRESSION, Task.MULTICLASSIFICATION]:
            raise ValueError("Unsupported task '{}'".format(task))

        if task == Task.BINARYCLASSIFICATION:
        if task == Task.BINARYCLASSIFICATION:
            model_func = OmpForestBinaryClassifier
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestBinaryClassifier(model_parameters)
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
                    random_state=model_parameters.seed)
            else:
                return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
                    random_state=model_parameters.seed)
        elif task == Task.REGRESSION:
        elif task == Task.REGRESSION:
            model_func = OmpForestRegressor
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestRegressor(model_parameters)
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestRegressor(n_estimators=model_parameters.extracted_forest_size,
                    random_state=model_parameters.seed)
            elif model_parameters.extraction_strategy == 'similarity':
                return SimilarityForestRegressor(model_parameters)
            else:
                return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
                    random_state=model_parameters.seed)
        elif task == Task.MULTICLASSIFICATION:
        elif task == Task.MULTICLASSIFICATION:
            model_func = OmpForestMulticlassClassifier
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestMulticlassClassifier(model_parameters)
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
                    random_state=model_parameters.seed)
            else:
            else:
            raise ValueError("Unsupported task '{}'".format(task))
                return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
        return model_func(model_parameters)
                    random_state=model_parameters.seed)

    @staticmethod
    def load(task, directory_path, experiment_id, model_raw_results):
        raise NotImplementedError
        model_parameters = ModelParameters.load(directory_path, experiment_id)
        model = ModelFactory.build(task, model_parameters)
        # todo faire ce qu'il faut ici pour rétablir correctement le modèle
        model.set_forest(model_raw_results.model_object.forest)
        model.set_weights(model_raw_results.model_object.weights)
        return model
Original line number Original line Diff line number Diff line
@@ -5,13 +5,15 @@ import os


class ModelParameters(object):
class ModelParameters(object):


    def __init__(self, extracted_forest_size, normalize_D, subsets_used, normalize_weights, seed, hyperparameters):
    def __init__(self, extracted_forest_size, normalize_D, subsets_used,
        normalize_weights, seed, hyperparameters, extraction_strategy):
        self._extracted_forest_size = extracted_forest_size
        self._extracted_forest_size = extracted_forest_size
        self._normalize_D = normalize_D
        self._normalize_D = normalize_D
        self._subsets_used = subsets_used
        self._subsets_used = subsets_used
        self._normalize_weights = normalize_weights
        self._normalize_weights = normalize_weights
        self._seed = seed
        self._seed = seed
        self._hyperparameters = hyperparameters
        self._hyperparameters = hyperparameters
        self._extraction_strategy = extraction_strategy


    @property
    @property
    def extracted_forest_size(self):
    def extracted_forest_size(self):
@@ -37,6 +39,10 @@ class ModelParameters(object):
    def hyperparameters(self):
    def hyperparameters(self):
        return self._hyperparameters
        return self._hyperparameters


    @property
    def extraction_strategy(self):
        return self._extraction_strategy

    def save(self, directory_path, experiment_id):
    def save(self, directory_path, experiment_id):
        save_obj_to_json(directory_path + os.sep + 'model_parameters_{}.json'.format(experiment_id),
        save_obj_to_json(directory_path + os.sep + 'model_parameters_{}.json'.format(experiment_id),
            self.__dict__)
            self.__dict__)
Original line number Original line Diff line number Diff line
@@ -8,8 +8,8 @@ class ModelRawResults(object):


    def __init__(self, model_object, training_time,
    def __init__(self, model_object, training_time,
        datetime, train_score, dev_score, test_score,
        datetime, train_score, dev_score, test_score,
        score_metric, train_score_regressor, dev_score_regressor,
        train_score_base, dev_score_base,
        test_score_regressor):
        test_score_base, score_metric, base_score_metric):


        self._model_object = model_object
        self._model_object = model_object
        self._training_time = training_time
        self._training_time = training_time
@@ -17,10 +17,11 @@ class ModelRawResults(object):
        self._train_score = train_score
        self._train_score = train_score
        self._dev_score = dev_score
        self._dev_score = dev_score
        self._test_score = test_score
        self._test_score = test_score
        self._train_score_base = train_score_base
        self._dev_score_base = dev_score_base
        self._test_score_base = test_score_base
        self._score_metric = score_metric
        self._score_metric = score_metric
        self._train_score_regressor = train_score_regressor
        self._base_score_metric = base_score_metric
        self._dev_score_regressor = dev_score_regressor
        self._test_score_regressor = test_score_regressor
    
    
    @property
    @property
    def model_object(self):
    def model_object(self):
@@ -47,20 +48,24 @@ class ModelRawResults(object):
        return self._test_score
        return self._test_score


    @property
    @property
    def score_metric(self):
    def train_score_base(self):
        return self._score_metric
        return self._train_score_base

    @property
    def dev_score_base(self):
        return self._dev_score_base


    @property
    @property
    def train_score_regressor(self):
    def test_score_base(self):
        return self._train_score_regressor
        return self._test_score_base


    @property
    @property
    def dev_score_regressor(self):
    def score_metric(self):
        return self._dev_score_regressor
        return self._score_metric


    @property
    @property
    def test_score_regressor(self):
    def base_score_metric(self):
        return self._test_score_regressor
        return self._base_score_metric


    def save(self, models_dir):
    def save(self, models_dir):
        save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
        save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
Original line number Original line Diff line number Diff line
@@ -17,10 +17,14 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
    def models_parameters(self):
    def models_parameters(self):
        return self._models_parameters
        return self._models_parameters


    def predict_base_estimator(self, X):
        return self._base_forest_estimator.predict(X)

    def score_base_estimator(self, X, y):
    def score_base_estimator(self, X, y):
        return self._base_forest_estimator.score(X, y)
        return self._base_forest_estimator.score(X, y)


    def _base_estimator_predictions(self, X):
    def _base_estimator_predictions(self, X):
        # We need to use predict_proba to get the probabilities of each class
        return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
        return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T


    @property
    @property
@@ -63,7 +67,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
        if normalize_weights:
        if normalize_weights:
            # we can normalize weights (by their sum) so that they sum to 1
            # we can normalize weights (by their sum) so that they sum to 1
            # and they can be interpreted as impact percentages for interpretability.
            # and they can be interpreted as impact percentages for interpretability.
            # this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef)
            # this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why


            # question: je comprend pas le truc avec nonszero?
            # question: je comprend pas le truc avec nonszero?
            # predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
            # predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
Original line number Original line Diff line number Diff line
@@ -60,7 +60,7 @@ class OmpForestMulticlassClassifier(OmpForest):
        for class_label in possible_classes:
        for class_label in possible_classes:
            atoms_binary = binarize_class_data(atoms, class_label, inplace=False)
            atoms_binary = binarize_class_data(atoms, class_label, inplace=False)
            objective_binary = binarize_class_data(objective, class_label, inplace=False)
            objective_binary = binarize_class_data(objective, class_label, inplace=False)
            # todo peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
            # TODO: peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
            omp_class = OrthogonalMatchingPursuit(
            omp_class = OrthogonalMatchingPursuit(
                n_nonzero_coefs=self.models_parameters.extracted_forest_size,
                n_nonzero_coefs=self.models_parameters.extracted_forest_size,
                fit_intercept=True, normalize=False)
                fit_intercept=True, normalize=False)
@@ -69,7 +69,9 @@ class OmpForestMulticlassClassifier(OmpForest):
        return self._dct_class_omp
        return self._dct_class_omp


    def predict(self, X):
    def predict(self, X):
        forest_predictions = self._base_estimator_predictions(X)
        '''forest_predictions = self._base_estimator_predictions(X)

        print(forest_predictions.shape)


        if self._models_parameters.normalize_D:
        if self._models_parameters.normalize_D:
            forest_predictions /= self._forest_norms
            forest_predictions /= self._forest_norms
@@ -79,9 +81,26 @@ class OmpForestMulticlassClassifier(OmpForest):
        for class_label, omp_class in self._dct_class_omp.items():
        for class_label, omp_class in self._dct_class_omp.items():
            label_names.append(class_label)
            label_names.append(class_label)
            atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False)
            atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False)
            print(atoms_binary.shape)
            preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
            preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))


        # todo verifier que ce n'est pas bugué ici
        # TODO: verifier que ce n'est pas bugué ici

        preds = np.array(preds).T'''

        forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T

        if self._models_parameters.normalize_D:
            forest_predictions /= self._forest_norms

        label_names = []
        preds = []
        num_class = 0
        for class_label, omp_class in self._dct_class_omp.items():
            label_names.append(class_label)
            atoms_binary = (forest_predictions[num_class] - 0.5) * 2 # centré réduit de 0/1 à -1/1
            preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
            num_class += 1


        preds = np.array(preds).T
        preds = np.array(preds).T
        max_preds = np.argmax(preds, axis=1)
        max_preds = np.argmax(preds, axis=1)
@@ -97,6 +116,27 @@ class OmpForestMulticlassClassifier(OmpForest):


        return evaluation
        return evaluation


    @staticmethod
    def _make_omp_weighted_prediction(base_predictions, omp_obj, normalize_weights=False):
        if normalize_weights:
            # we can normalize weights (by their sum) so that they sum to 1
            # and they can be interpreted as impact percentages for interpretability.
            # this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why

            # question: je comprend pas le truc avec nonszero?
            # predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
            coef_signs = np.sign(omp_obj.coef_)[np.newaxis, :]  # add axis to make sure it will be broadcasted line-wise (there might be a confusion when forest_prediction is square)
            unsigned_coef = (coef_signs * omp_obj.coef_).squeeze()
            intercept = omp_obj.intercept_

            adjusted_forest_predictions = base_predictions * coef_signs
            predictions = adjusted_forest_predictions.dot(unsigned_coef) + intercept

        else:
            predictions = omp_obj.predict(base_predictions)

        return predictions



if __name__ == "__main__":
if __name__ == "__main__":
    forest = RandomForestClassifier(n_estimators=10)
    forest = RandomForestClassifier(n_estimators=10)
Original line number Original line Diff line number Diff line
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator
from abc import abstractmethod, ABCMeta
import numpy as np


class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta):
    """
    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2822360/
    """

    def __init__(self, models_parameters):
        self._models_parameters = models_parameters
        self._regressor = RandomForestRegressor(n_estimators=self._models_parameters.hyperparameters['n_estimators'],
            random_state=models_parameters.seed)
        self._extracted_forest_size = self._models_parameters.extracted_forest_size

    @property
    def models_parameters(self):
        return self._models_parameters

    def fit(self, X_train, y_train, X_val, y_val, score_metric=mean_squared_error):

        self._regressor.fit(X_train, y_train)

        y_val_pred = self._regressor.predict(X_val)
        forest_pred = score_metric(y_val, y_val_pred)
        forest = self._regressor.estimators_
        selected_trees = list()
        tree_list = list(self._regressor.estimators_)

        for _ in range(self._extracted_forest_size):
            best_similarity = 100000
            found_index = 0
            for i in range(len(tree_list)):
                lonely_tree = tree_list[i]
                del tree_list[i]
                val_list = list()
                for tree in tree_list:
                    val_pred = tree.predict(X_val)
                    val_list.append(val_pred)
                val_list = np.array(val_list)
                val_mean = np.mean(val_list, axis=0)
                val_score = score_metric(val_mean, y_val)
                temp_similarity = abs(forest_pred - val_score)
                if (temp_similarity < best_similarity):
                    found_index = i
                    best_similarity = temp_similarity
                tree_list.insert(i, lonely_tree)
            selected_trees.append(tree_list[found_index])
            del tree_list[found_index]

        pruned_forest = list(set(forest) - set(selected_trees))
        self._regressor.estimators_ = pruned_forest

    def score(self, X, y):
        test_list = list()
        for mod in self._regressor.estimators_:
            test_pred = mod.predict(X)
            test_list.append(test_pred)
        test_list = np.array(test_list)
        test_mean = np.mean(test_list, axis=0)
        score = mean_squared_error(test_mean, y)
        return score
Original line number Original line Diff line number Diff line
from bolsonaro.models.model_raw_results import ModelRawResults
from bolsonaro.models.model_raw_results import ModelRawResults
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.data.task import Task
from . import LOG_PATH
from . import LOG_PATH


from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
import time
import time
import datetime
import datetime
import numpy as np
import numpy as np
@@ -12,16 +18,41 @@ class Trainer(object):
    Class capable of fitting any model object to some prepared data then evaluate and save results through the `train` method.
    Class capable of fitting any model object to some prepared data then evaluate and save results through the `train` method.
    """
    """


    def __init__(self, dataset):
    def __init__(self, dataset, regression_score_metric=mean_squared_error, classification_score_metric=accuracy_score,
        base_regression_score_metric=mean_squared_error, base_classification_score_metric=accuracy_score):
        """
        """


        :param dataset: Object with X_train, y_train, X_dev, y_dev, X_test and Y_test attributes
        :param dataset: Object with X_train, y_train, X_dev, y_dev, X_test and Y_test attributes
        """
        """
        self._dataset = dataset
        self._dataset = dataset
        self._logger = LoggerFactory.create(LOG_PATH, __name__)
        self._logger = LoggerFactory.create(LOG_PATH, __name__)
        self._regression_score_metric = regression_score_metric
        self._classification_score_metric = classification_score_metric
        self._base_regression_score_metric = base_regression_score_metric
        self._base_classification_score_metric = base_classification_score_metric
        self._score_metric_name = regression_score_metric.__name__ if dataset.task == Task.REGRESSION \
            else classification_score_metric.__name__
        self._base_score_metric_name = base_regression_score_metric.__name__ if dataset.task == Task.REGRESSION \
            else base_classification_score_metric.__name__


    def init(self, model):
    @property
        if model.models_parameters.subsets_used == 'train,dev':
    def score_metric_name(self):
        return self._score_metric_name

    @property
    def base_score_metric_name(self):
        return self._base_score_metric_name

    def init(self, model, subsets_used='train,dev'):
        if type(model) in [RandomForestRegressor, RandomForestClassifier]:
            if subsets_used == 'train,dev':
                self._X_forest = self._dataset.X_train
                self._y_forest = self._dataset.y_train
            else:
                self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
                self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])    
            self._logger.debug('Fitting the forest on train subset')
        elif model.models_parameters.subsets_used == 'train,dev':
            self._X_forest = self._dataset.X_train
            self._X_forest = self._dataset.X_train
            self._y_forest = self._dataset.y_train
            self._y_forest = self._dataset.y_train
            self._X_omp = self._dataset.X_dev
            self._X_omp = self._dataset.X_dev
@@ -43,43 +74,77 @@ class Trainer(object):


    def train(self, model):
    def train(self, model):
        """
        """
        :param model: Object with
        :param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
            OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
        :return:
        :return:
        """
        """


        self._logger.debug('Training model using train set...')
        self._logger.debug('Training model using train set...')
        self._begin_time = time.time()
        self._begin_time = time.time()
        if type(model) in [RandomForestRegressor, RandomForestClassifier]:
            model.fit(
                X=self._X_forest,
                y=self._y_forest
            )
        else:
            model.fit(
            model.fit(
            X_forest=self._X_forest,
                self._X_forest,
            y_forest=self._y_forest,
                self._y_forest,
            X_omp=self._X_omp,
                self._X_omp,
            y_omp=self._y_omp
                self._y_omp
            )
            )
        self._end_time = time.time()
        self._end_time = time.time()


    def __score_func(self, model, X, y_true):
        if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
            y_pred = model.predict(X)
            result = self._regression_score_metric(y_true, y_pred)
        elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]:
            y_pred = model.predict(X)
            if type(model) is OmpForestBinaryClassifier:
                y_pred = y_pred.round()
            result = self._classification_score_metric(y_true, y_pred)
        return result

    def __score_func_base(self, model, X, y_true):
        if type(model) == OmpForestRegressor:
            y_pred = model.predict_base_estimator(X)
            result = self._base_regression_score_metric(y_true, y_pred)
        elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
            y_pred = model.predict_base_estimator(X)
            result = self._base_classification_score_metric(y_true, y_pred)
        elif type(model) == RandomForestClassifier:
            y_pred = model.predict(X)
            result = self._base_classification_score_metric(y_true, y_pred)
        elif type(model) in [RandomForestRegressor, SimilarityForestRegressor]:
            y_pred = model.predict(X)
            result = self._base_regression_score_metric(y_true, y_pred)
        return result

    def compute_results(self, model, models_dir):
    def compute_results(self, model, models_dir):
        """
        """
        :param model: Object with
        :param model: Object with
        :param models_dir: Where the results will be saved
        :param models_dir: Where the results will be saved
        """
        """
        results = ModelRawResults(
        results = ModelRawResults(
            model_object=model,
            model_object='',
            training_time=self._end_time - self._begin_time,
            training_time=self._end_time - self._begin_time,
            datetime=datetime.datetime.now(),
            datetime=datetime.datetime.now(),
            train_score=model.score(self._dataset.X_train, self._dataset.y_train),
            train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train),
            dev_score=model.score(self._dataset.X_dev, self._dataset.y_dev),
            dev_score=self.__score_func(model, self._dataset.X_dev, self._dataset.y_dev),
            test_score=model.score(self._dataset.X_test, self._dataset.y_test),
            test_score=self.__score_func(model, self._dataset.X_test, self._dataset.y_test),
            score_metric=model.DEFAULT_SCORE_METRIC, # TODO: resolve the used metric in a proper way
            train_score_base=self.__score_func_base(model, self._dataset.X_train, self._dataset.y_train),
            train_score_regressor=model.score_base_estimator(self._dataset.X_train, self._dataset.y_train),
            dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
            dev_score_regressor=model.score_base_estimator(self._dataset.X_dev, self._dataset.y_dev),
            test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
            test_score_regressor=model.score_base_estimator(self._dataset.X_test, self._dataset.y_test)
            score_metric=self._score_metric_name,
            base_score_metric=self._base_score_metric_name
        )
        )
        results.save(models_dir)
        results.save(models_dir)
        self._logger.info("Base performance on test: {}".format(results.test_score_regressor))
        self._logger.info("Base performance on test: {}".format(results.test_score_base))
        self._logger.info("Performance on test: {}".format(results.test_score))
        self._logger.info("Performance on test: {}".format(results.test_score))


        self._logger.info("Base performance on train: {}".format(results.train_score_regressor))
        self._logger.info("Base performance on train: {}".format(results.train_score_base))
        self._logger.info("Performance on train: {}".format(results.train_score))
        self._logger.info("Performance on train: {}".format(results.train_score))


        self._logger.info("Base performance on dev: {}".format(results.dev_score_regressor))
        self._logger.info("Base performance on dev: {}".format(results.dev_score_base))
        self._logger.info("Performance on dev: {}".format(results.dev_score))
        self._logger.info("Performance on dev: {}".format(results.dev_score))
Original line number Original line Diff line number Diff line
@@ -2,6 +2,8 @@ import os
import json
import json
import pickle
import pickle
from copy import deepcopy
from copy import deepcopy
import contextlib
import joblib




def resolve_experiment_id(models_dir):
def resolve_experiment_id(models_dir):
@@ -58,7 +60,6 @@ def binarize_class_data(data, class_pos, inplace=True):
    """
    """
    if not inplace:
    if not inplace:
        data = deepcopy(data)
        data = deepcopy(data)

    position_class_labels = (data == class_pos)
    position_class_labels = (data == class_pos)
    data[~(position_class_labels)] = -1
    data[~(position_class_labels)] = -1
    data[(position_class_labels)] = +1
    data[(position_class_labels)] = +1
@@ -66,10 +67,48 @@ def binarize_class_data(data, class_pos, inplace=True):
    return data
    return data


def change_binary_func_load(base_load_function):
def change_binary_func_load(base_load_function):
    def func_load(return_X_y):
    def func_load(return_X_y, random_state=None):
        if random_state:
            X, y = base_load_function(return_X_y=return_X_y, random_state=random_state)
        else:
            X, y = base_load_function(return_X_y=return_X_y)
            X, y = base_load_function(return_X_y=return_X_y)
        possible_classes = sorted(set(y))
        possible_classes = sorted(set(y))
        assert len(possible_classes) == 2, "Function change binary_func_load only work for binary classfication"
        assert len(possible_classes) == 2, "Function change binary_func_load only work for binary classfication"
        y = binarize_class_data(y, possible_classes[-1])
        y = binarize_class_data(y, possible_classes[-1])
        return X, y
        return X, y
    return func_load
    return func_load

@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
    class TqdmBatchCompletionCallback:
        def __init__(self, time, index, parallel):
            self.index = index
            self.parallel = parallel

        def __call__(self, index):
            tqdm_object.update()
            if self.parallel._original_iterator is not None:
                self.parallel.dispatch_next()

    old_batch_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_batch_callback
        tqdm_object.close()    

def is_int(value):
    try:
        int(value)
        return True
    except ValueError:
        return False

def is_float(value):
    try:
        float(value)
        return True
    except ValueError:
        return False
Original line number Original line Diff line number Diff line
@@ -57,7 +57,56 @@ class Plotter(object):
        ax.plot(x_value, mean, c=color_mean, label=label)
        ax.plot(x_value, mean, c=color_mean, label=label)


    @staticmethod
    @staticmethod
    def plot_losses(file_path, all_experiment_scores, x_value, xlabel, ylabel, all_labels, title):
    def plot_stage1_losses(file_path, all_experiment_scores_with_params,
        all_experiment_scores_wo_params, x_value, xlabel, ylabel, all_labels, title):

        fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)

        n = len(all_experiment_scores_with_params)

        if n != len(all_experiment_scores_wo_params):
            raise ValueError('all_experiment_scores_with_params and all_experiment_scores_wo_params must have the same len to be compared.')

        """
        Get as many different colors from the specified cmap (here nipy_spectral)
        as there are curve to plot.
        """
        colors = Plotter.get_colors_from_cmap(n)

        for j, all_experiment_scores in enumerate([all_experiment_scores_with_params,
            all_experiment_scores_wo_params]):
            # For each curve to plot
            for i in range(n):
                # Retreive the scores in a list for each seed
                experiment_scores = list(all_experiment_scores[i].values())
                # Compute the mean and the std for the CI
                mean_experiment_scores = np.average(experiment_scores, axis=0)
                std_experiment_scores = np.std(experiment_scores, axis=0)
                # Plot the score curve with the CI
                Plotter.plot_mean_and_CI(
                    ax=axes[j],
                    mean=mean_experiment_scores,
                    lb=mean_experiment_scores + std_experiment_scores,
                    ub=mean_experiment_scores - std_experiment_scores,
                    x_value=x_value,
                    color_mean=colors[i],
                    facecolor=colors[i],
                    label=all_labels[i]
                )

        axes[0].set_xlabel(xlabel)
        axes[1].set_xlabel(xlabel)
        axes[0].set_ylabel(ylabel)
        axes[1].set_title(title)
        handles, labels = axes[0].get_legend_handles_labels()
        legend = axes[0].legend(handles, labels, loc='upper center', bbox_to_anchor=(1.1, -0.15))
        fig.savefig(file_path, dpi=fig.dpi, bbox_extra_artists=(legend,), bbox_inches='tight')
        plt.close(fig)

    @staticmethod
    def plot_stage2_losses(file_path, all_experiment_scores, x_value,
        xlabel, ylabel, all_labels, title):

        fig, ax = plt.subplots()
        fig, ax = plt.subplots()


        n = len(all_experiment_scores)
        n = len(all_experiment_scores)
@@ -91,7 +140,7 @@ class Plotter(object):
        plt.ylabel(ylabel)
        plt.ylabel(ylabel)
        plt.title(title)
        plt.title(title)
        plt.legend(loc='upper right')
        plt.legend(loc='upper right')
        fig.savefig(file_path, dpi=fig.dpi)
        fig.savefig(file_path, dpi=fig.dpi, bbox_inches='tight')
        plt.close(fig)
        plt.close(fig)


    @staticmethod
    @staticmethod
Original line number Original line Diff line number Diff line
@@ -4,7 +4,7 @@ from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.task import Task
from bolsonaro.data.task import Task
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.hyperparameter_searcher import HyperparameterSearcher
from bolsonaro.hyperparameter_searcher import HyperparameterSearcher
from bolsonaro.utils import save_obj_to_json
from bolsonaro.utils import save_obj_to_json, tqdm_joblib, is_int, is_float


import argparse
import argparse
import os
import os
@@ -12,13 +12,20 @@ import pathlib
import pickle
import pickle
import random
import random
from dotenv import find_dotenv, load_dotenv
from dotenv import find_dotenv, load_dotenv
from joblib import Parallel, delayed
from tqdm import tqdm
import threading
import numpy as np
import math
from collections import Counter
from itertools import chain, combinations


"""
"""
I had to install skopt from this repository
I had to install skopt from this repository
https://github.com/darenr/scikit-optimize that handles
https://github.com/darenr/scikit-optimize that handles
the issue described here https://github.com/scikit-optimize/scikit-optimize/issues/762.
the issue described here https://github.com/scikit-optimize/scikit-optimize/issues/762.
"""
"""
from skopt.space import Categorical, Integer, Real
from skopt.space import Categorical, Integer




def clean_numpy_int_dict(dictionary):
def clean_numpy_int_dict(dictionary):
@@ -34,6 +41,89 @@ def clean_numpy_int_list(list_n):
            clean_numpy_int_list(elem) if type(elem) == list else elem
            clean_numpy_int_list(elem) if type(elem) == list else elem
            for elem in list_n]
            for elem in list_n]


def process_job(dataset_name, seed, param_space, args):
    logger = LoggerFactory.create(LOG_PATH, 'hyperparameter-searcher_seed{}_ti{}'.format(
        seed, threading.get_ident()))
    logger.info('seed={}'.format(seed))

    dataset = DatasetLoader.load_default(dataset_name, seed)

    if dataset.task == Task.REGRESSION:
        scorer = 'neg_mean_squared_error'
    else:
        scorer = 'accuracy'

    bayesian_searcher = HyperparameterSearcher()
    opt = bayesian_searcher.search(dataset, param_space, args.n_iter,
        args.cv, seed, scorer)

    return {
        '_scorer': scorer,
        '_best_score_train': opt.best_score_,
        '_best_score_test': opt.score(dataset.X_test, dataset.y_test),
        '_best_parameters': clean_numpy_int_dict(opt.best_params_),
        '_random_seed': seed
    }

def run_hyperparameter_search_jobs(seeds, dataset_name, param_space, args):
    # Run one hyperparameter search job per seed
    with tqdm_joblib(tqdm(total=len(seeds), disable=not args.verbose)) as progress_bar:
        opt_results = Parallel(n_jobs=args.job_number)(delayed(process_job)(
            dataset_name, seeds[i], param_space, args) for i in range(len(seeds)))
    return opt_results

def compute_best_params_over_seeds(seeds, dataset_name, param_space, args):
    opt_results = run_hyperparameter_search_jobs(seeds, dataset_name, param_space, args)

    # Move k best_parameters to a list of dict
    all_best_params = [opt_result['_best_parameters'] for opt_result in opt_results]

    """
    list of hyperparam dicts -> list of hyperparam list
    where each element of form 'key:value' becomes 'key_value'
    to afterwards count most common pairs.
    """
    stringify_best_params = list()
    for current_best_params in all_best_params:
        new_best_params = list()
        for key, value in current_best_params.items():
            new_best_params.append(key + '_' + str(value))
        stringify_best_params.append(new_best_params)

    # Compute pair combinations
    pair_combinations = chain.from_iterable(combinations(line, 2) for line in stringify_best_params)

    # Count most common pair combinations in ascent order
    most_common_pair_combinations = Counter(pair_combinations).most_common()

    """
    Select the most frequent hyperparameter values
    until all different hyperparameter variables are
    filled.
    """
    all_param_names = all_best_params[0].keys()
    best_params = dict()
    for pair, _ in most_common_pair_combinations:
        for element in pair:
            split = element.split('_')
            param, value = '_'.join(split[:-1]), split[-1]
            if param not in best_params:
                if is_int(value):
                    value = int(value)
                elif is_float(value):
                    value = float(value)
                best_params[param] = value
        if len(best_params) == len(all_param_names):
            break

    return {
        '_scorer': opt_results[0]['_scorer'],
        '_best_score_train': np.mean([opt_result['_best_score_train'] for opt_result in opt_results]),
        '_best_score_test': np.mean([opt_result['_best_score_test'] for opt_result in opt_results]),
        '_best_parameters': best_params,
        '_random_seed': [opt_result['_random_seed'] for opt_result in opt_results]
    }



if __name__ == "__main__":
if __name__ == "__main__":
    # get environment variables in .env
    # get environment variables in .env
@@ -41,57 +131,54 @@ if __name__ == "__main__":


    DEFAULT_CV = 3
    DEFAULT_CV = 3
    DEFAULT_N_ITER = 50
    DEFAULT_N_ITER = 50
    DEFAULT_VERBOSE = False
    DEFAULT_JOB_NUMBER = -1
    DICT_PARAM_SPACE = {'n_estimators': Integer(10, 1000),
    DICT_PARAM_SPACE = {'n_estimators': Integer(10, 1000),
                        'min_samples_leaf': Integer(1, 1000),
                        'min_samples_leaf': Integer(1, 1000),
                        'max_depth': Integer(1, 20),
                        'max_depth': Integer(1, 20),
                        'max_features': Categorical(['auto', 'sqrt', 'log2'], [0.5, 0.25, 0.25])}
                        'max_features': Categorical(['auto', 'sqrt', 'log2'], [0.5, 0.25, 0.25])}
    DATASET_LIST = ['boston', 'iris', 'diabetes']
    begin_random_seed_range = 1
    # , 'digits', 'linnerud', 'wine']
    end_random_seed_range = 2000
    DEFAULT_USE_VARIABLE_SEED_NUMBER = False


    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--cv', nargs='?', type=int, default=DEFAULT_CV, help='Specify the size of the cross-validation.')
    parser.add_argument('--cv', nargs='?', type=int, default=DEFAULT_CV, help='Specify the size of the cross-validation.')
    parser.add_argument('--n_iter', nargs='?', type=int, default=DEFAULT_N_ITER, help='Specify the number of iterations for the bayesian search.')
    parser.add_argument('--n_iter', nargs='?', type=int, default=DEFAULT_N_ITER, help='Specify the number of iterations for the bayesian search.')
    parser.add_argument('--seed', nargs='?', type=int, default=None, help='Specify a seed instead of generate it randomly.')
    parser.add_argument('--random_seed_number', nargs='?', type=int, default=DatasetLoader.DEFAULT_RANDOM_SEED_NUMBER, help='Number of random seeds used.')
    parser.add_argument('--datasets', nargs='+', type=str, default=DATASET_LIST, help='Specify the dataset used by the estimator.')
    parser.add_argument('--seeds', nargs='+', type=int, default=None, help='Specific a list of seeds instead of generate them randomly')
    parser.add_argument('--verbose', action='store_true', default=False, help='Print information during the bayesian search.')
    parser.add_argument('--use_variable_seed_number', action='store_true', default=DEFAULT_USE_VARIABLE_SEED_NUMBER, help='Compute the amount of random seeds depending on the dataset.')

    parser.add_argument('--datasets', nargs='+', type=str, default=DatasetLoader.dataset_names, help='Specify the dataset used by the estimator.')
    parser.add_argument('--verbose', action='store_true', default=DEFAULT_VERBOSE, help='Print tqdm progress bar.')
    parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
    args = parser.parse_args()
    args = parser.parse_args()


    logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
    logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))


    begin_random_seed_range = 1
    if args.seeds != None and args.random_seed_number > 1:
    end_random_seed_range = 2000
        logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    


    if args.seed is None:
    # Seeds are either provided as parameters or generated at random
        random_seed = random.randint(begin_random_seed_range, end_random_seed_range)
    if not args.use_variable_seed_number:
    else:
        seeds = args.seeds if args.seeds is not None \
        random_seed = args.seed
            else [random.randint(begin_random_seed_range, end_random_seed_range) \
            for i in range(args.random_seed_number)]


    for dataset_name in args.datasets:
    for dataset_name in args.datasets:

        dataset_dir = os.path.join('experiments', dataset_name, 'stage1')
        dataset_dir = os.path.join('experiments', dataset_name, 'stage1')

        pathlib.Path(dataset_dir).mkdir(parents=True, exist_ok=True)
        pathlib.Path(dataset_dir).mkdir(parents=True, exist_ok=True)


        logger.info('Bayesian search on dataset {}'.format(dataset_name))
        logger.info('Bayesian search on dataset {}'.format(dataset_name))
        
        
        dataset_parameters = DatasetParameters(dataset_name, test_size=0.2, dev_size=0.01, random_state=random_seed, dataset_normalizer=None)
        """
        dataset = DatasetLoader.load(dataset_parameters)
        Compute the amount of random seeds as specified in

        DatasetLoader.dataset_seed_numbers dictionary, depending on
        if dataset.task == Task.REGRESSION:
        the dataset.
            scorer = 'neg_mean_squared_error'
        """
        else:
        if args.use_variable_seed_number:
            scorer = 'accuracy'
            seeds = [random.randint(begin_random_seed_range, end_random_seed_range) \

                for i in range(DatasetLoader.dataset_seed_numbers[dataset_name])]
        bayesian_searcher = HyperparameterSearcher()
        opt = bayesian_searcher.search(dataset, DICT_PARAM_SPACE, args.n_iter,
            args.cv, random_seed, scorer, args.verbose)


        dict_results = {'_scorer': scorer,
        dict_results = compute_best_params_over_seeds(seeds, dataset_name,
                        '_best_score_train': opt.best_score_,
            DICT_PARAM_SPACE, args)
                        '_best_score_test': opt.score(dataset.X_test, dataset.y_test),
                        '_best_parameters': clean_numpy_int_dict(opt.best_params_),
                        '_random_seed': random_seed
                        }


        save_obj_to_json(os.path.join(dataset_dir, 'params.json'), dict_results)
        save_obj_to_json(os.path.join(dataset_dir, 'params.json'), dict_results)
+138 −48
Original line number Original line Diff line number Diff line
@@ -3,7 +3,7 @@ from bolsonaro.data.dataset_loader import DatasetLoader
from bolsonaro.models.model_factory import ModelFactory
from bolsonaro.models.model_factory import ModelFactory
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.trainer import Trainer
from bolsonaro.trainer import Trainer
from bolsonaro.utils import resolve_experiment_id
from bolsonaro.utils import resolve_experiment_id, tqdm_joblib
from bolsonaro import LOG_PATH
from bolsonaro import LOG_PATH
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.error_handling.logger_factory import LoggerFactory


@@ -13,9 +13,12 @@ import json
import pathlib
import pathlib
import random
import random
import os
import os
from concurrent import futures
from joblib import Parallel, delayed
import threading
import threading
import json
import json
from tqdm import tqdm
import numpy as np
import shutil




def process_job(seed, parameters, experiment_id, hyperparameters):
def process_job(seed, parameters, experiment_id, hyperparameters):
@@ -51,10 +54,10 @@ def process_job(seed, parameters, experiment_id, hyperparameters):


    trainer = Trainer(dataset)
    trainer = Trainer(dataset)


    if parameters['extraction_strategy'] != 'none':
        for extracted_forest_size in parameters['extracted_forest_size']:
        for extracted_forest_size in parameters['extracted_forest_size']:
        # question if training is too long, one may also split experiments for different forest sizes into different workers
            logger.info('extracted_forest_size={}'.format(extracted_forest_size))
            logger.info('extracted_forest_size={}'.format(extracted_forest_size))
        sub_models_dir = models_dir + os.sep + 'extracted_forest_size' + os.sep + str(extracted_forest_size)
            sub_models_dir = models_dir + os.sep + 'extracted_forest_sizes' + os.sep + str(extracted_forest_size)
            pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
            pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)


            model_parameters = ModelParameters(
            model_parameters = ModelParameters(
@@ -63,52 +66,104 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
                subsets_used=parameters['subsets_used'],
                subsets_used=parameters['subsets_used'],
                normalize_weights=parameters['normalize_weights'],
                normalize_weights=parameters['normalize_weights'],
                seed=seed,
                seed=seed,
            hyperparameters=hyperparameters
                hyperparameters=hyperparameters,
                extraction_strategy=parameters['extraction_strategy']
            )
            )
            model_parameters.save(sub_models_dir, experiment_id)
            model_parameters.save(sub_models_dir, experiment_id)


            model = ModelFactory.build(dataset.task, model_parameters)
            model = ModelFactory.build(dataset.task, model_parameters)


        trainer.init(model)
            trainer.init(model, subsets_used=parameters['subsets_used'])
            trainer.train(model)
            trainer.compute_results(model, sub_models_dir)
    else:
        forest_size = hyperparameters['n_estimators']
        logger.info('Base forest training with fixed forest size of {}'.format(forest_size))
        sub_models_dir = models_dir + os.sep + 'forest_size' + os.sep + str(forest_size)
        pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)

        model_parameters = ModelParameters(
            extracted_forest_size=forest_size,
            normalize_D=parameters['normalize_D'],
            subsets_used=parameters['subsets_used'],
            normalize_weights=parameters['normalize_weights'],
            seed=seed,
            hyperparameters=hyperparameters,
            extraction_strategy=parameters['extraction_strategy']
        )
        model_parameters.save(sub_models_dir, experiment_id)

        model = ModelFactory.build(dataset.task, model_parameters)

        trainer.init(model, subsets_used=parameters['subsets_used'])
        trainer.train(model)
        trainer.train(model)
        trainer.compute_results(model, sub_models_dir)
        trainer.compute_results(model, sub_models_dir)
    logger.info('Training done')
    logger.info('Training done')


"""
Command lines example for stage 1:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --extracted_forest_size_stop=0.05
python code/compute_results.py --stage 1 --experiment_ids 1 2 3 4 5 6 --dataset_name=california_housing

Command lines example for stage 2:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 no_normalization --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D --normalize_D --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_weights --normalize_weights --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D_and_weights --normalize_D --normalize_weights --extracted_forest_size_stop=0.05
python code/compute_results.py --stage 2 --experiment_ids 7 8 9 10 --dataset_name=california_housing

Command lines example for stage 3:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 3 train-dev_subset --extracted_forest_size_stop=0.05 --subsets_used train,dev
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 3 train-dev_train-dev_subset --extracted_forest_size_stop=0.05 --subsets_used train+dev,train+dev
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 3 train-train-dev_subset --extracted_forest_size_stop=0.05 --subsets_used train,train+dev
python code/compute_results.py --stage 3 --experiment_ids 11 12 13 --dataset_name=california_housing

Command lines example for stage 4:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --save_experiment_configuration 4 none_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --save_experiment_configuration 4 random_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 4 omp_with_params --extracted_forest_size_stop=0.05 --subsets_used train+dev,train+dev
python code/compute_results.py --stage 4 --experiment_ids 1 2 3 --dataset_name=california_housing
"""
if __name__ == "__main__":
if __name__ == "__main__":
    load_dotenv(find_dotenv('.env'))
    load_dotenv(find_dotenv('.env'))

    DEFAULT_EXPERIMENT_CONFIGURATION_PATH = 'experiments'
    DEFAULT_EXPERIMENT_CONFIGURATION_PATH = 'experiments'
    DEFAULT_DATASET_NAME = 'boston'
    # the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_sizes/{extracted_forest_size}
    DEFAULT_NORMALIZE_D = False
    DEFAULT_MODELS_DIR = os.environ['project_dir'] + os.sep + 'models'
    DEFAULT_DATASET_NORMALIZER = None
    DEFAULT_VERBOSE = False
    DEFAULT_FOREST_SIZE = 100
    DEFAULT_SKIP_BEST_HYPERPARAMS = False
    DEFAULT_EXTRACTED_FOREST_SIZE = 10
    DEFAULT_JOB_NUMBER = -1
    # the models will be stored in a directory structure like: models/{experiment_id}/seeds/{seed_nb}/extracted_forest_size/{nb_extracted_trees}
    DEFAULT_EXTRACTION_STRATEGY = 'omp'
    DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
    DEFAULT_DEV_SIZE = 0.2
    DEFAULT_TEST_SIZE = 0.2
    DEFAULT_RANDOM_SEED_NUMBER = 1
    DEFAULT_SUBSETS_USED = 'train,dev'
    DEFAULT_NORMALIZE_WEIGHTS = False


    begin_random_seed_range = 1
    begin_random_seed_range = 1
    end_random_seed_range = 2000
    end_random_seed_range = 2000


    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--experiment_id', nargs='?', type=int, default=None, help='Specify an experiment id. Remove already existing model with this specified experiment id.')
    parser.add_argument('--experiment_configuration', nargs='?', type=str, default=None, help='Specify an experiment configuration file name. Overload all other parameters.')
    parser.add_argument('--experiment_configuration', nargs='?', type=str, default=None, help='Specify an experiment configuration file name. Overload all other parameters.')
    parser.add_argument('--experiment_configuration_path', nargs='?', type=str, default=DEFAULT_EXPERIMENT_CONFIGURATION_PATH, help='Specify the experiment configuration directory path.')
    parser.add_argument('--experiment_configuration_path', nargs='?', type=str, default=DEFAULT_EXPERIMENT_CONFIGURATION_PATH, help='Specify the experiment configuration directory path.')
    parser.add_argument('--dataset_name', nargs='?', type=str, default=DEFAULT_DATASET_NAME, help='Specify the dataset. Regression: boston, diabetes, linnerud, california_housing. Classification: iris, digits, wine, breast_cancer, olivetti_faces, 20newsgroups, 20newsgroups_vectorized, lfw_people, lfw_pairs, covtype, rcv1, kddcup99.')
    parser.add_argument('--dataset_name', nargs='?', type=str, default=DatasetLoader.DEFAULT_DATASET_NAME, help='Specify the dataset. Regression: boston, diabetes, linnerud, california_housing. Classification: iris, digits, wine, breast_cancer, olivetti_faces, 20newsgroups, 20newsgroups_vectorized, lfw_people, lfw_pairs, covtype, rcv1, kddcup99.')
    parser.add_argument('--normalize_D', action='store_true', default=DEFAULT_NORMALIZE_D, help='Specify if we want to normalize the prediction of the forest by doing the L2 division of the pred vectors.')
    parser.add_argument('--normalize_D', action='store_true', default=DatasetLoader.DEFAULT_NORMALIZE_D, help='Specify if we want to normalize the prediction of the forest by doing the L2 division of the pred vectors.')
    parser.add_argument('--dataset_normalizer', nargs='?', type=str, default=DEFAULT_DATASET_NORMALIZER, help='Specify which dataset normalizer use (either standard, minmax, robust or normalizer).')
    parser.add_argument('--dataset_normalizer', nargs='?', type=str, default=DatasetLoader.DEFAULT_DATASET_NORMALIZER, help='Specify which dataset normalizer use (either standard, minmax, robust or normalizer).')
    parser.add_argument('--forest_size', nargs='?', type=int, default=DEFAULT_FOREST_SIZE, help='The number of trees of the random forest.')
    parser.add_argument('--forest_size', nargs='?', type=int, default=None, help='The number of trees of the random forest.')
    parser.add_argument('--extracted_forest_size', nargs='+', type=int, default=DEFAULT_EXTRACTED_FOREST_SIZE, help='The number of trees selected by OMP.')
    parser.add_argument('--extracted_forest_size_samples', nargs='?', type=int, default=DatasetLoader.DEFAULT_EXTRACTED_FOREST_SIZE_SAMPLES, help='The number of extracted forest sizes (proportional to the forest size) selected by OMP.')
    parser.add_argument('--extracted_forest_size_stop', nargs='?', type=float, default=DatasetLoader.DEFAULT_EXTRACTED_FOREST_SIZE_STOP, help='Specify the upper bound of the extracted forest sizes linspace.')
    parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
    parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
    parser.add_argument('--dev_size', nargs='?', type=float, default=DEFAULT_DEV_SIZE, help='Dev subset ratio.')
    parser.add_argument('--dev_size', nargs='?', type=float, default=DatasetLoader.DEFAULT_DEV_SIZE, help='Dev subset ratio.')
    parser.add_argument('--test_size', nargs='?', type=float, default=DEFAULT_TEST_SIZE, help='Test subset ratio.')
    parser.add_argument('--test_size', nargs='?', type=float, default=DatasetLoader.DEFAULT_TEST_SIZE, help='Test subset ratio.')
    parser.add_argument('--random_seed_number', nargs='?', type=int, default=DEFAULT_RANDOM_SEED_NUMBER, help='Number of random seeds used.')
    parser.add_argument('--random_seed_number', nargs='?', type=int, default=DatasetLoader.DEFAULT_RANDOM_SEED_NUMBER, help='Number of random seeds used.')
    parser.add_argument('--seeds', nargs='+', type=int, default=None, help='Specific a list of seeds instead of generate them randomly')
    parser.add_argument('--seeds', nargs='+', type=int, default=None, help='Specific a list of seeds instead of generate them randomly')
    parser.add_argument('--subsets_used', nargs='+', type=str, default=DEFAULT_SUBSETS_USED, help='train,dev: forest on train, OMP on dev. train+dev,train+dev: both forest and OMP on train+dev. train,train+dev: forest on train+dev and OMP on dev.')
    parser.add_argument('--subsets_used', nargs='?', type=str, default=DatasetLoader.DEFAULT_SUBSETS_USED, help='train,dev: forest on train, OMP on dev. train+dev,train+dev: both forest and OMP on train+dev. train,train+dev: forest on train+dev and OMP on dev.')
    parser.add_argument('--normalize_weights', action='store_true', default=DEFAULT_NORMALIZE_WEIGHTS, help='Divide the predictions by the weights sum.')
    parser.add_argument('--normalize_weights', action='store_true', default=DatasetLoader.DEFAULT_NORMALIZE_WEIGHTS, help='Divide the predictions by the weights sum.')
    parser.add_argument('--verbose', action='store_true', default=DEFAULT_VERBOSE, help='Print tqdm progress bar.')
    parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
    parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
    parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none or similarity.')
    args = parser.parse_args()
    args = parser.parse_args()


    if args.experiment_configuration:
    if args.experiment_configuration:
@@ -118,26 +173,43 @@ if __name__ == "__main__":
    else:
    else:
        parameters = args.__dict__
        parameters = args.__dict__


    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity']:
        raise ValueError('Specified extraction strategy {} is not supported.'.format(parameters.extraction_strategy))

    pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
    pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)


    logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))
    logger = LoggerFactory.create(LOG_PATH, os.path.basename(__file__))


    # The number of tree to extract from forest (K)
    parameters['extracted_forest_size'] = parameters['extracted_forest_size'] \
        if type(parameters['extracted_forest_size']) == list \
        else [parameters['extracted_forest_size']]

    hyperparameters_path = os.path.join('experiments', args.dataset_name, 'stage1', 'params.json')
    hyperparameters_path = os.path.join('experiments', args.dataset_name, 'stage1', 'params.json')
    if os.path.exists(hyperparameters_path):
    if os.path.exists(hyperparameters_path):
        logger.info("Hyperparameters found for this dataset at '{}'".format(hyperparameters_path))
        logger.info("Hyperparameters found for this dataset at '{}'".format(hyperparameters_path))
        with open(hyperparameters_path, 'r+') as file_hyperparameter:
        with open(hyperparameters_path, 'r+') as file_hyperparameter:
            hyperparameters = json.load(file_hyperparameter)['best_parameters']
            loaded_hyperparameters = json.load(file_hyperparameter)['best_parameters']
            if args.skip_best_hyperparams:
                hyperparameters = {'n_estimators': loaded_hyperparameters['n_estimators']}
            else:
                hyperparameters = loaded_hyperparameters
    else:
    else:
        hyperparameters = {}
        hyperparameters = {}


    if parameters['forest_size'] is not None:
    """
    First case: no best hyperparameters are specified and no forest_size parameter
    specified in argument, so use the DEFAULT_FOREST_SIZE.
    Second case: no matter if hyperparameters are specified, the forest_size parameter
    will override it.
    Third implicit case: use the number of estimators found in specified hyperparameters.
    """
    if len(hyperparameters) == 0 and parameters['forest_size'] is None:
        hyperparameters['n_estimators'] = DatasetLoader.DEFAULT_FOREST_SIZE
    elif parameters['forest_size'] is not None:
        hyperparameters['n_estimators'] = parameters['forest_size']
        hyperparameters['n_estimators'] = parameters['forest_size']


    # The number of tree to extract from forest (K)
    parameters['extracted_forest_size'] = np.unique(np.around(hyperparameters['n_estimators'] *
        np.linspace(0, args.extracted_forest_size_stop,
        parameters['extracted_forest_size_samples'] + 1,
        endpoint=False)[1:]).astype(np.int)).tolist()

    if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
    if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
        logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    
        logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    


@@ -146,6 +218,10 @@ if __name__ == "__main__":
        else [random.randint(begin_random_seed_range, end_random_seed_range) \
        else [random.randint(begin_random_seed_range, end_random_seed_range) \
        for i in range(parameters['random_seed_number'])]
        for i in range(parameters['random_seed_number'])]


    if args.experiment_id:
        experiment_id = args.experiment_id
        shutil.rmtree(os.path.join(parameters['models_dir'], str(experiment_id)), ignore_errors=True)
    else:
        # Resolve the next experiment id number (last id + 1)
        # Resolve the next experiment id number (last id + 1)
        experiment_id = resolve_experiment_id(parameters['models_dir'])
        experiment_id = resolve_experiment_id(parameters['models_dir'])
    logger.info('Experiment id: {}'.format(experiment_id))
    logger.info('Experiment id: {}'.format(experiment_id))
@@ -153,18 +229,32 @@ if __name__ == "__main__":
    """
    """
    If the experiment configuration isn't coming from
    If the experiment configuration isn't coming from
    an already existing file, save it to a json file to
    an already existing file, save it to a json file to
    keep trace of it.
    keep trace of it (either a specified path, either in 'unnamed' dir.).
    """
    """
    if args.experiment_configuration is None:
    if args.experiment_configuration is None:
        with open(args.experiment_configuration_path + os.sep + 'unnamed_{}.json'.format(
        if args.save_experiment_configuration:
            experiment_id), 'w') as output_file:
            if len(args.save_experiment_configuration) != 2:
                raise ValueError('save_experiment_configuration must have two parameters.')
            elif int(args.save_experiment_configuration[0]) not in list(range(1, 6)):
                raise ValueError('save_experiment_configuration first parameter must be a supported stage id (i.e. [1, 5]).')
            output_experiment_stage_path = os.path.join(args.experiment_configuration_path,
                args.dataset_name, 'stage' + args.save_experiment_configuration[0])
            pathlib.Path(output_experiment_stage_path).mkdir(parents=True, exist_ok=True)
            output_experiment_configuration_path = os.path.join(output_experiment_stage_path,
                args.save_experiment_configuration[1] + '.json')
        else:
            pathlib.Path(os.path.join(args.experiment_configuration_path, 'unnamed')).mkdir(parents=True, exist_ok=True)
            output_experiment_configuration_path = os.path.join(
                args.experiment_configuration_path, 'unnamed', 'unnamed_{}.json'.format(
                experiment_id))
        with open(output_experiment_configuration_path, 'w') as output_file:
            json.dump(
            json.dump(
                parameters,
                parameters,
                output_file,
                output_file,
                indent=4
                indent=4
            )
            )


    # Train as much job as there are seeds
    # Run as much job as there are seeds
    with futures.ProcessPoolExecutor(len(seeds)) as executor:
    with tqdm_joblib(tqdm(total=len(seeds), disable=not args.verbose)) as progress_bar:
        list(f.result() for f in futures.as_completed(executor.submit(process_job, seed,
        Parallel(n_jobs=args.job_number)(delayed(process_job)(seeds[i],
            parameters, experiment_id, hyperparameters) for seed in seeds))
            parameters, experiment_id, hyperparameters) for i in range(len(seeds)))

conda_requirements.txt

0 → 100644
+143 −0
Original line number Original line Diff line number Diff line
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
_libgcc_mutex=0.1=main
alabaster=0.7.12=pypi_0
attrs=19.3.0=pypi_0
autopep8=1.4.4=py_0
awscli=1.16.273=pypi_0
babel=2.7.0=pypi_0
backcall=0.1.0=pypi_0
blas=1.0=mkl
bleach=3.1.0=pypi_0
botocore=1.13.9=pypi_0
ca-certificates=2019.10.16=0
certifi=2019.9.11=py37_0
chardet=3.0.4=pypi_0
click=7.0=pypi_0
colorama=0.4.1=pypi_0
coverage=4.5.4=pypi_0
cycler=0.10.0=py_2
dbus=1.13.6=he372182_0
decorator=4.4.1=pypi_0
defusedxml=0.6.0=pypi_0
docutils=0.15.2=pypi_0
entrypoints=0.3=pypi_0
expat=2.2.5=he1b5a44_1004
flake8=3.7.9=pypi_0
fontconfig=2.13.1=he4413a7_1000
freetype=2.10.0=he983fc9_1
gettext=0.19.8.1=hc5be6a0_1002
glib=2.58.3=h6f030ca_1002
gst-plugins-base=1.14.5=h0935bb2_0
gstreamer=1.14.5=h36ae1b5_0
icu=58.2=hf484d3e_1000
idna=2.8=pypi_0
imagesize=1.1.0=pypi_0
importlib-metadata=0.23=pypi_0
intel-openmp=2019.4=243
ipykernel=5.1.3=pypi_0
ipython=7.9.0=pypi_0
ipython-genutils=0.2.0=pypi_0
ipywidgets=7.5.1=pypi_0
jedi=0.15.1=pypi_0
jinja2=2.10.3=pypi_0
jmespath=0.9.4=pypi_0
joblib=0.14.0=py_0
jpeg=9c=h14c3975_1001
jsonschema=3.1.1=pypi_0
jupyter=1.0.0=pypi_0
jupyter-client=5.3.4=pypi_0
jupyter-console=6.0.0=pypi_0
jupyter-core=4.6.1=pypi_0
kiwisolver=1.1.0=py37hc9558a2_0
libedit=3.1.20181209=hc058e9b_0
libffi=3.2.1=hd88cf55_4
libgcc-ng=9.1.0=hdf63c60_0
libgfortran-ng=7.3.0=hdf63c60_0
libiconv=1.15=h516909a_1005
libpng=1.6.37=hed695b0_0
libstdcxx-ng=9.1.0=hdf63c60_0
libuuid=2.32.1=h14c3975_1000
libxcb=1.13=h14c3975_1002
libxml2=2.9.9=h13577e0_2
markupsafe=1.1.1=pypi_0
matplotlib=3.1.1=pypi_0
mccabe=0.6.1=pypi_0
mistune=0.8.4=pypi_0
mkl=2019.4=243
mkl-service=2.3.0=py37he904b0f_0
mkl_fft=1.0.14=py37ha843d7b_0
mkl_random=1.1.0=py37hd6b4f25_0
more-itertools=7.2.0=pypi_0
nbconvert=5.6.1=pypi_0
nbformat=4.4.0=pypi_0
ncurses=6.1=he6710b0_1
notebook=6.0.2=pypi_0
numpy=1.17.2=py37haad9e8e_0
numpy-base=1.17.2=py37hde5b4d6_0
openssl=1.1.1d=h7b6447c_3
packaging=19.2=pypi_0
pandas=0.25.2=py37he6710b0_0
pandocfilters=1.4.2=pypi_0
parso=0.5.1=pypi_0
pcre=8.43=he1b5a44_0
pexpect=4.7.0=pypi_0
pickleshare=0.7.5=pypi_0
pip=19.3.1=py37_0
prometheus-client=0.7.1=pypi_0
prompt-toolkit=2.0.10=pypi_0
pthread-stubs=0.4=h14c3975_1001
ptyprocess=0.6.0=pypi_0
pyaml=19.4.1=pypi_0
pyasn1=0.4.7=pypi_0
pycodestyle=2.5.0=pypi_0
pyflakes=2.1.1=pypi_0
pygments=2.4.2=pypi_0
pyparsing=2.4.3=pypi_0
pyqt=5.9.2=py37hcca6a23_4
pyrsistent=0.15.5=pypi_0
python=3.7.4=h265db76_1
python-dateutil=2.8.0=py37_0
python-dotenv=0.10.3=pypi_0
pytz=2019.3=py_0
pyyaml=5.1.2=pypi_0
pyzmq=18.1.0=pypi_0
qt=5.9.7=h52cfd70_2
qtconsole=4.5.5=pypi_0
readline=7.0=h7b6447c_5
requests=2.22.0=pypi_0
rsa=3.4.2=pypi_0
s3transfer=0.2.1=pypi_0
scikit-learn=0.21.3=py37hd81dba3_0
scikit-optimize=0.6+19.g180d6be=pypi_0
scipy=1.3.1=py37h7c811a0_0
send2trash=1.5.0=pypi_0
setuptools=41.6.0=py37_0
sip=4.19.8=py37hf484d3e_1000
six=1.12.0=py37_0
snowballstemmer=2.0.0=pypi_0
sphinx=2.2.1=pypi_0
sphinxcontrib-applehelp=1.0.1=pypi_0
sphinxcontrib-devhelp=1.0.1=pypi_0
sphinxcontrib-htmlhelp=1.0.2=pypi_0
sphinxcontrib-jsmath=1.0.1=pypi_0
sphinxcontrib-qthelp=1.0.2=pypi_0
sphinxcontrib-serializinghtml=1.1.3=pypi_0
sqlite=3.30.1=h7b6447c_0
terminado=0.8.2=pypi_0
testpath=0.4.4=pypi_0
tk=8.6.8=hbc83047_0
tornado=6.0.3=py37h516909a_0
tqdm=4.37.0=pypi_0
traitlets=4.3.3=pypi_0
urllib3=1.25.6=pypi_0
wcwidth=0.1.7=pypi_0
webencodings=0.5.1=pypi_0
wheel=0.33.6=py37_0
widgetsnbextension=3.5.1=pypi_0
xorg-libxau=1.0.9=h14c3975_0
xorg-libxdmcp=1.1.3=h516909a_0
xz=5.2.4=h14c3975_4
zipp=0.6.0=pypi_0
zlib=1.2.11=h7b6447c_3

data/diamonds.csv

0 → 100644
+53941 −0

File added.

Preview size limit exceeded, changes collapsed.

Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/20newsgroups_vectorized/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        10,
        21,
        31,
        42,
        52,
        63,
        73,
        84,
        94,
        104,
        115,
        125,
        136,
        146,
        157,
        167,
        177,
        188,
        198,
        209,
        219,
        230,
        240,
        251,
        261,
        271,
        282,
        292,
        303,
        313
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/20newsgroups_vectorized/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        10,
        21,
        31,
        42,
        52,
        63,
        73,
        84,
        94,
        104,
        115,
        125,
        136,
        146,
        157,
        167,
        177,
        188,
        198,
        209,
        219,
        230,
        240,
        251,
        261,
        271,
        282,
        292,
        303,
        313
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/20newsgroups_vectorized/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        10,
        21,
        31,
        42,
        52,
        63,
        73,
        84,
        94,
        104,
        115,
        125,
        136,
        146,
        157,
        167,
        177,
        188,
        198,
        209,
        219,
        230,
        240,
        251,
        261,
        271,
        282,
        292,
        303,
        313
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        2078,
        90
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        36,
        73,
        109,
        145,
        182,
        218,
        255,
        291,
        327,
        364
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
{
    "scorer": "neg_mean_squared_error",
    "scorer": "neg_mean_squared_error",
    "best_score_train": -11.238253315624897,
    "best_score_train": -13.33228274304088,
    "best_score_test": -7.312532120669678,
    "best_score_test": -13.650326577972058,
    "best_parameters": {
    "best_parameters": {
        "max_depth": 20,
        "max_features": "auto",
        "max_features": "auto",
        "min_samples_leaf": 1,
        "min_samples_leaf": 1,
        "max_depth": 20,
        "n_estimators": 1000
        "n_estimators": 1000
    },
    },
    "random_seed": 289
    "random_seed": [
        1812,
        1844,
        1376,
        383,
        310,
        1620,
        54,
        1502,
        324,
        1536,
        1202,
        1069,
        645,
        1706,
        423
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/boston/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/boston/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/boston/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
{
    "scorer": "accuracy",
    "scorer": "accuracy",
    "best_score_train": 0.96,
    "best_score_train": 0.9562271062271059,
    "best_score_test": 0.956140350877193,
    "best_score_test": 0.9514619883040936,
    "best_parameters": {
    "best_parameters": {
        "max_depth": 20,
        "max_depth": 20,
        "max_features": "sqrt",
        "min_samples_leaf": 1,
        "min_samples_leaf": 1,
        "n_estimators": 1000
        "n_estimators": 1000,
    }
        "max_features": "log2"
    },
    "random_seed": [
        1505,
        5,
        484,
        284,
        289,
        1014,
        1752,
        497,
        1350,
        781,
        408,
        256,
        1494,
        1940,
        842
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/breast_cancer/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        12,
        13,
        14,
        15,
        17,
        18,
        19,
        21,
        22,
        23,
        25,
        26,
        27,
        28,
        30,
        31,
        32,
        34,
        35,
        36,
        37,
        39
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/breast_cancer/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        12,
        13,
        14,
        15,
        17,
        18,
        19,
        21,
        22,
        23,
        25,
        26,
        27,
        28,
        30,
        31,
        32,
        34,
        35,
        36,
        37,
        39
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/breast_cancer/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        12,
        13,
        14,
        15,
        17,
        18,
        19,
        21,
        22,
        23,
        25,
        26,
        27,
        28,
        30,
        31,
        32,
        34,
        35,
        36,
        37,
        39
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 1
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 4
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 3
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 6
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "scorer": "neg_mean_squared_error",
    "best_score_train": -0.2535049905518054,
    "best_score_test": -0.24128661227361273,
    "best_parameters": {
        "max_features": "log2",
        "min_samples_leaf": 1,
        "n_estimators": 1000,
        "max_depth": 18
    },
    "random_seed": [
        1012,
        529,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 2
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 5
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/california_housing/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "models_dir": "models/california_housing/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "4",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file