Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results

Target

Select target project
No results found
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results
Show changes
297 files
+ 10970
521
Compare changes
  • Side-by-side
  • Inline

Files

+1 −3
Original line number Original line Diff line number Diff line
* Fix model results loading in compute_results.py.
* Check that omp multiclasses classifier is working as expected.
* Check that omp multiclasses classifier is working as expected.
 No newline at end of file
* Fix the dataset error of fetcher when job_number > 1.
 No newline at end of file
Original line number Original line Diff line number Diff line
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator
from sklearn.tree import DecisionTreeRegressor
from abc import abstractmethod, ABCMeta
import numpy as np
from tqdm import tqdm


class EnsembleSelectionForestRegressor(BaseEstimator, metaclass=ABCMeta):
    """
    'Ensemble selection from libraries of models' by Rich Caruana et al
    """

    def __init__(self, models_parameters, library, score_metric=mean_squared_error):
        self._models_parameters = models_parameters
        self._library = library
        self._extracted_forest_size = self._models_parameters.extracted_forest_size
        self._score_metric = score_metric

    @property
    def models_parameters(self):
        return self._models_parameters

    @property
    def library(self):
        return self._library

    def fit(self, X_train, y_train, X_val, y_val):
        scores_list = list()
        for estimator in self._library:
            val_score = self._score_metric(estimator.predict(X_val), y_val)
            scores_list.append(val_score)

        class_list = list(self._library)
        m = np.argmax(np.asarray(scores_list))
        self._ensemble_selected = [class_list[m]]
        temp_pred = class_list[m].predict(X_val)
        del class_list[m]
        for k in range(self._extracted_forest_size - 1):
            candidate_index = 0
            best_score = 100000
            for j in range(len(class_list)):
                temp_pred = np.vstack((temp_pred, class_list[j].predict(X_val)))
                temp_mean = np.mean(temp_pred, axis=0)
                temp_score = self._score_metric(temp_mean, y_val)
                if (temp_score < best_score):
                    candidate_index = j
                    best_score = temp_score
                temp_pred = np.delete(temp_pred, -1, 0)
            self._ensemble_selected.append(class_list[candidate_index])
            temp_pred = np.vstack((temp_pred, class_list[candidate_index].predict(X_val)))
            del class_list[candidate_index]

    def score(self, X, y):
        predictions = self._predict_base_estimator(X)
        return self._score_metric(predictions, y)

    def predict_base_estimator(self, X):
        predictions = list()
        for tree in self._ensemble_selected:
            predictions.append(tree.predict(X))
        mean_predictions = np.mean(np.array(predictions), axis=0)
        return mean_predictions

    @staticmethod
    def generate_library(X_train, y_train, random_state=None):
        criterion_arr = ["mse"]#, "friedman_mse", "mae"]
        splitter_arr = ["best"]#, "random"]
        depth_arr = [i for i in range(5, 20, 1)]
        min_samples_split_arr = [i for i in range(2, 20, 1)]
        min_samples_leaf_arr = [i for i in range(2, 20, 1)]
        max_features_arr = ["sqrt"]#["auto", "sqrt", "log2"]

        library = list()
        with tqdm(total=len(criterion_arr) * len(splitter_arr) * \
            len(depth_arr) * len(min_samples_split_arr) * len(min_samples_leaf_arr) * \
            len(max_features_arr)) as bar:
            bar.set_description('Generating library')
            for criterion in criterion_arr:
                for splitter in splitter_arr:
                    for depth in depth_arr:
                        for min_samples_split in min_samples_split_arr:
                            for min_samples_leaf in min_samples_leaf_arr:
                                for max_features in max_features_arr:
                                    t = DecisionTreeRegressor(criterion=criterion, splitter=splitter, max_depth=depth, min_samples_split=min_samples_split,
                                        min_samples_leaf=min_samples_leaf, max_features=max_features, random_state=random_state)
                                    t.fit(X_train, y_train)
                                    library.append(t)
                                    bar.update(1)
        return library
Original line number Original line Diff line number Diff line
@@ -3,6 +3,7 @@ from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
from bolsonaro.data.task import Task
from bolsonaro.data.task import Task


from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
@@ -13,7 +14,7 @@ import pickle
class ModelFactory(object):
class ModelFactory(object):


    @staticmethod
    @staticmethod
    def build(task, model_parameters):
    def build(task, model_parameters, library=None):
        if task not in [Task.BINARYCLASSIFICATION, Task.REGRESSION, Task.MULTICLASSIFICATION]:
        if task not in [Task.BINARYCLASSIFICATION, Task.REGRESSION, Task.MULTICLASSIFICATION]:
            raise ValueError("Unsupported task '{}'".format(task))
            raise ValueError("Unsupported task '{}'".format(task))


@@ -21,10 +22,10 @@ class ModelFactory(object):
            if model_parameters.extraction_strategy == 'omp':
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestBinaryClassifier(model_parameters)
                return OmpForestBinaryClassifier(model_parameters)
            elif model_parameters.extraction_strategy == 'random':
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
                return RandomForestClassifier(**model_parameters.hyperparameters,
                    random_state=model_parameters.seed)
                    random_state=model_parameters.seed)
            elif model_parameters.extraction_strategy == 'none':
            elif model_parameters.extraction_strategy == 'none':
                return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
                return RandomForestClassifier(**model_parameters.hyperparameters,
                    random_state=model_parameters.seed)
                    random_state=model_parameters.seed)
            else:
            else:
                raise ValueError('Invalid extraction strategy')
                raise ValueError('Invalid extraction strategy')
@@ -32,14 +33,16 @@ class ModelFactory(object):
            if model_parameters.extraction_strategy == 'omp':
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestRegressor(model_parameters)
                return OmpForestRegressor(model_parameters)
            elif model_parameters.extraction_strategy == 'random':
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestRegressor(n_estimators=model_parameters.extracted_forest_size,
                return RandomForestRegressor(**model_parameters.hyperparameters,
                    random_state=model_parameters.seed)
                    random_state=model_parameters.seed)
            elif model_parameters.extraction_strategy == 'similarity':
            elif model_parameters.extraction_strategy == 'similarity':
                return SimilarityForestRegressor(model_parameters)
                return SimilarityForestRegressor(model_parameters)
            elif model_parameters.extraction_strategy == 'kmeans':
            elif model_parameters.extraction_strategy == 'kmeans':
                return KMeansForestRegressor(model_parameters)
                return KMeansForestRegressor(model_parameters)
            elif model_parameters.extraction_strategy == 'ensemble':
                return EnsembleSelectionForestRegressor(model_parameters, library=library)
            elif model_parameters.extraction_strategy == 'none':
            elif model_parameters.extraction_strategy == 'none':
                return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
                return RandomForestRegressor(**model_parameters.hyperparameters,
                    random_state=model_parameters.seed)
                    random_state=model_parameters.seed)
            else:
            else:
                raise ValueError('Invalid extraction strategy')
                raise ValueError('Invalid extraction strategy')
@@ -47,10 +50,10 @@ class ModelFactory(object):
            if model_parameters.extraction_strategy == 'omp':
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestMulticlassClassifier(model_parameters)
                return OmpForestMulticlassClassifier(model_parameters)
            elif model_parameters.extraction_strategy == 'random':
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
                return RandomForestClassifier(**model_parameters.hyperparameters,
                    random_state=model_parameters.seed)
                    random_state=model_parameters.seed)
            elif model_parameters.extraction_strategy == 'none':
            elif model_parameters.extraction_strategy == 'none':
                return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
                return RandomForestClassifier(**model_parameters.hyperparameters,
                    random_state=model_parameters.seed)
                    random_state=model_parameters.seed)
            else:
            else:
                raise ValueError('Invalid extraction strategy')
                raise ValueError('Invalid extraction strategy')
Original line number Original line Diff line number Diff line
@@ -123,7 +123,9 @@ class SingleOmpForest(OmpForest):
        forest_predictions = self._base_estimator_predictions(X)
        forest_predictions = self._base_estimator_predictions(X)


        if self._models_parameters.normalize_D:
        if self._models_parameters.normalize_D:
            forest_predictions = forest_predictions.T
            forest_predictions /= self._forest_norms
            forest_predictions /= self._forest_norms
            forest_predictions = forest_predictions.T


        return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)
        return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)


@@ -134,16 +136,15 @@ class SingleOmpForest(OmpForest):
        Make all the base tree predictions
        Make all the base tree predictions


        :param X: a Forest
        :param X: a Forest
        :return: a np.array of the predictions of the entire forest
        :return: a np.array of the predictions of the trees selected by OMP without applyong the weight
        """
        """
        forest_predictions = self._base_estimator_predictions(X).T
        forest_predictions = np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_])


        if self._models_parameters.normalize_D:
        if self._models_parameters.normalize_D:
            forest_predictions = forest_predictions.T
            forest_predictions /= self._forest_norms
            forest_predictions /= self._forest_norms
            forest_predictions = forest_predictions.T


        weights = self._omp.coef_
        weights = self._omp.coef_
        omp_trees_indices = np.nonzero(weights)[0]
        select_trees = np.mean(forest_predictions[weights != 0], axis=0)

        select_trees = np.mean(forest_predictions[omp_trees_indices], axis=0)
        print(len(omp_trees_indices))
        return select_trees
        return select_trees
Original line number Original line Diff line number Diff line
@@ -37,12 +37,12 @@ class OmpForestBinaryClassifier(SingleOmpForest):
        forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_])
        forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_])


        if self._models_parameters.normalize_D:
        if self._models_parameters.normalize_D:
            forest_predictions = forest_predictions.T
            forest_predictions /= self._forest_norms
            forest_predictions /= self._forest_norms
            forest_predictions = forest_predictions.T


        weights = self._omp.coef_
        weights = self._omp.coef_
        omp_trees_indices = np.nonzero(weights)
        omp_trees_predictions = forest_predictions[weights != 0].T[1]

        omp_trees_predictions = forest_predictions[omp_trees_indices].T[1]


        # Here forest_pred is the probability of being class 1.
        # Here forest_pred is the probability of being class 1.


@@ -119,7 +119,9 @@ class OmpForestMulticlassClassifier(OmpForest):
        forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
        forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T


        if self._models_parameters.normalize_D:
        if self._models_parameters.normalize_D:
            forest_predictions = forest_predictions.T
            forest_predictions /= self._forest_norms
            forest_predictions /= self._forest_norms
            forest_predictions = forest_predictions.T


        label_names = []
        label_names = []
        preds = []
        preds = []
Original line number Original line Diff line number Diff line
@@ -2,6 +2,8 @@ from bolsonaro.models.model_raw_results import ModelRawResults
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, OmpForestMulticlassClassifier
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.error_handling.logger_factory import LoggerFactory
from bolsonaro.data.task import Task
from bolsonaro.data.task import Task
from . import LOG_PATH
from . import LOG_PATH
@@ -72,16 +74,21 @@ class Trainer(object):
        else:
        else:
            raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
            raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))


    def train(self, model):
    def train(self, model, extracted_forest_size=None):
        """
        """
        :param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
        :param model: An instance of either RandomForestRegressor, RandomForestClassifier, OmpForestRegressor,
            OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
            OmpForestBinaryClassifier, OmpForestMulticlassClassifier.
        :return:
        :return:
        """
        """

        self._logger.debug('Training model using train set...')
        self._logger.debug('Training model using train set...')
        self._begin_time = time.time()
        self._begin_time = time.time()
        if type(model) in [RandomForestRegressor, RandomForestClassifier]:
        if type(model) in [RandomForestRegressor, RandomForestClassifier]:
            if extracted_forest_size is not None:
                estimators_index = np.arange(len(model.estimators_))
                np.random.shuffle(estimators_index)
                choosen_estimators = estimators_index[:extracted_forest_size]
                model.estimators_ = np.array(model.estimators_)[choosen_estimators]
            else:
                model.fit(
                model.fit(
                    X=self._X_forest,
                    X=self._X_forest,
                    y=self._y_forest
                    y=self._y_forest
@@ -96,7 +103,7 @@ class Trainer(object):
        self._end_time = time.time()
        self._end_time = time.time()


    def __score_func(self, model, X, y_true, weights=True):
    def __score_func(self, model, X, y_true, weights=True):
        if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
        if type(model) in [OmpForestRegressor, RandomForestRegressor]:
            if weights:
            if weights:
                y_pred = model.predict(X)
                y_pred = model.predict(X)
            else:
            else:
@@ -111,10 +118,12 @@ class Trainer(object):
                y_pred = np.sign(y_pred)
                y_pred = np.sign(y_pred)
                y_pred = np.where(y_pred == 0, 1, y_pred)
                y_pred = np.where(y_pred == 0, 1, y_pred)
            result = self._classification_score_metric(y_true, y_pred)
            result = self._classification_score_metric(y_true, y_pred)
        elif type(model) in [SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor]:
            result = model.score(X, y_true)
        return result
        return result


    def __score_func_base(self, model, X, y_true):
    def __score_func_base(self, model, X, y_true):
        if type(model) == OmpForestRegressor:
        if type(model) in [OmpForestRegressor, SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor]:
            y_pred = model.predict_base_estimator(X)
            y_pred = model.predict_base_estimator(X)
            result = self._base_regression_score_metric(y_true, y_pred)
            result = self._base_regression_score_metric(y_true, y_pred)
        elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
        elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
@@ -123,7 +132,7 @@ class Trainer(object):
        elif type(model) == RandomForestClassifier:
        elif type(model) == RandomForestClassifier:
            y_pred = model.predict(X)
            y_pred = model.predict(X)
            result = self._base_classification_score_metric(y_true, y_pred)
            result = self._base_classification_score_metric(y_true, y_pred)
        elif type(model) in [RandomForestRegressor, SimilarityForestRegressor]:
        elif type(model) is RandomForestRegressor:
            y_pred = model.predict(X)
            y_pred = model.predict(X)
            result = self._base_regression_score_metric(y_true, y_pred)
            result = self._base_regression_score_metric(y_true, y_pred)
        return result
        return result
Original line number Original line Diff line number Diff line
@@ -133,10 +133,11 @@ if __name__ == "__main__":


    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].')
    parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].')
    parser.add_argument('--experiment_ids', nargs='+', type=int, required=True, help='Compute the results of the specified experiment id(s).' + \
    parser.add_argument('--experiment_ids', nargs='+', type=str, required=True, help='Compute the results of the specified experiment id(s).' + \
        'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}' + \
        'stage=1: {{base_with_params}} {{random_with_params}} {{omp_with_params}} {{base_wo_params}} {{random_wo_params}} {{omp_wo_params}}' + \
        'stage=2: {{no_normalization}} {{normalize_D}} {{normalize_weights}} {{normalize_D_and_weights}}' + \
        'stage=2: {{no_normalization}} {{normalize_D}} {{normalize_weights}} {{normalize_D_and_weights}}' + \
        'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}')
        'stage=3: {{train-dev_subset}} {{train-dev_train-dev_subset}} {{train-train-dev_subset}}' + \
        'stage=5: {{base_with_params}} {{random_with_params}} {{omp_with_params}} [ensemble={{id}}] [similarity={{id}}] [kmean={{id}}]')
    parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
    parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
    parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
    parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
    parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
    parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
@@ -159,7 +160,7 @@ if __name__ == "__main__":
            raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')
            raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')


        # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
        # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])
        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1]))


        # Experiments that used the best hyperparameters found for this dataset
        # Experiments that used the best hyperparameters found for this dataset


@@ -167,18 +168,18 @@ if __name__ == "__main__":
        logger.info('Loading base_with_params experiment scores...')
        logger.info('Loading base_with_params experiment scores...')
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
            base_with_params_experiment_score_metric = \
            base_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
            extracted_forest_sizes_number)
            extracted_forest_sizes_number)
        # random_with_params
        # random_with_params
        logger.info('Loading random_with_params experiment scores...')
        logger.info('Loading random_with_params experiment scores...')
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
        # omp_with_params
        # omp_with_params
        logger.info('Loading omp_with_params experiment scores...')
        logger.info('Loading omp_with_params experiment scores...')
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2])
                args.models_dir, args.results_dir, int(args.experiment_ids[2]))


        # Experiments that didn't use the best hyperparameters found for this dataset
        # Experiments that didn't use the best hyperparameters found for this dataset


@@ -186,19 +187,19 @@ if __name__ == "__main__":
        logger.info('Loading base_wo_params experiment scores...')
        logger.info('Loading base_wo_params experiment scores...')
        base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, \
        base_wo_params_train_scores, base_wo_params_dev_scores, base_wo_params_test_scores, \
            base_wo_params_experiment_score_metric = extract_scores_across_seeds_and_forest_size(
            base_wo_params_experiment_score_metric = extract_scores_across_seeds_and_forest_size(
                args.models_dir, args.results_dir, args.experiment_ids[3],
                args.models_dir, args.results_dir, int(args.experiment_ids[3]),
            extracted_forest_sizes_number)
            extracted_forest_sizes_number)
        # random_wo_params
        # random_wo_params
        logger.info('Loading random_wo_params experiment scores...')
        logger.info('Loading random_wo_params experiment scores...')
        random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
        random_wo_params_train_scores, random_wo_params_dev_scores, random_wo_params_test_scores, \
            wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \
            wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \
                extract_scores_across_seeds_and_extracted_forest_sizes(
                extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[4])
                args.models_dir, args.results_dir, int(args.experiment_ids[4]))
        # base_wo_params
        # base_wo_params
        logger.info('Loading base_wo_params experiment scores...')
        logger.info('Loading base_wo_params experiment scores...')
        omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \
        omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \
            omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
            omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[5])
                args.models_dir, args.results_dir, int(args.experiment_ids[5]))


        # Sanity check on the metrics retreived
        # Sanity check on the metrics retreived
        if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric ==
        if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric ==
@@ -243,25 +244,25 @@ if __name__ == "__main__":
        logger.info('Loading no_normalization experiment scores...')
        logger.info('Loading no_normalization experiment scores...')
        _, _, no_normalization_test_scores, extracted_forest_sizes, no_normalization_experiment_score_metric = \
        _, _, no_normalization_test_scores, extracted_forest_sizes, no_normalization_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[0])
            int(args.experiment_ids[0]))


        # normalize_D
        # normalize_D
        logger.info('Loading normalize_D experiment scores...')
        logger.info('Loading normalize_D experiment scores...')
        _, _, normalize_D_test_scores, _, normalize_D_experiment_score_metric = \
        _, _, normalize_D_test_scores, _, normalize_D_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[1])
            int(args.experiment_ids[1]))


        # normalize_weights
        # normalize_weights
        logger.info('Loading normalize_weights experiment scores...')
        logger.info('Loading normalize_weights experiment scores...')
        _, _, normalize_weights_test_scores, _, normalize_weights_experiment_score_metric = \
        _, _, normalize_weights_test_scores, _, normalize_weights_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[2])
            int(args.experiment_ids[2]))


        # normalize_D_and_weights
        # normalize_D_and_weights
        logger.info('Loading normalize_D_and_weights experiment scores...')
        logger.info('Loading normalize_D_and_weights experiment scores...')
        _, _, normalize_D_and_weights_test_scores, _, normalize_D_and_weights_experiment_score_metric = \
        _, _, normalize_D_and_weights_test_scores, _, normalize_D_and_weights_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[3])
            int(args.experiment_ids[3]))


        # Sanity check on the metrics retreived
        # Sanity check on the metrics retreived
        if not (no_normalization_experiment_score_metric == normalize_D_experiment_score_metric
        if not (no_normalization_experiment_score_metric == normalize_D_experiment_score_metric
@@ -290,21 +291,21 @@ if __name__ == "__main__":
        train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_subset_test_scores, \
        train_dev_subset_train_scores, train_dev_subset_dev_scores, train_dev_subset_test_scores, \
            extracted_forest_sizes, train_dev_subset_experiment_score_metric = \
            extracted_forest_sizes, train_dev_subset_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[0])
            int(args.experiment_ids[0]))


        # train-dev_train-dev_subset
        # train-dev_train-dev_subset
        logger.info('Loading train-dev_train-dev_subset experiment scores...')
        logger.info('Loading train-dev_train-dev_subset experiment scores...')
        train_dev_train_dev_subset_train_scores, train_dev_train_dev_subset_dev_scores, train_dev_train_dev_subset_test_scores, \
        train_dev_train_dev_subset_train_scores, train_dev_train_dev_subset_dev_scores, train_dev_train_dev_subset_test_scores, \
            _, train_dev_train_dev_subset_experiment_score_metric = \
            _, train_dev_train_dev_subset_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[1])
            int(args.experiment_ids[1]))


        # train-train-dev_subset
        # train-train-dev_subset
        logger.info('Loading train-train-dev_subset experiment scores...')
        logger.info('Loading train-train-dev_subset experiment scores...')
        train_train_dev_subset_train_scores, train_train_dev_subset_dev_scores, train_train_dev_subset_test_scores, \
        train_train_dev_subset_train_scores, train_train_dev_subset_dev_scores, train_train_dev_subset_test_scores, \
            _, train_train_dev_subset_experiment_score_metric = \
            _, train_train_dev_subset_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir,
            args.experiment_ids[2])
            int(args.experiment_ids[2]))


        # Sanity check on the metrics retreived
        # Sanity check on the metrics retreived
        if not (train_dev_subset_experiment_score_metric == train_dev_train_dev_subset_experiment_score_metric
        if not (train_dev_subset_experiment_score_metric == train_dev_train_dev_subset_experiment_score_metric
@@ -349,13 +350,13 @@ if __name__ == "__main__":
        logger.info('Loading base_with_params experiment scores...')
        logger.info('Loading base_with_params experiment scores...')
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
            base_with_params_experiment_score_metric = \
            base_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
            extracted_forest_sizes_number)
            extracted_forest_sizes_number)
        # random_with_params
        # random_with_params
        logger.info('Loading random_with_params experiment scores...')
        logger.info('Loading random_with_params experiment scores...')
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
        # omp_with_params
        # omp_with_params
        logger.info('Loading omp_with_params experiment scores...')
        logger.info('Loading omp_with_params experiment scores...')
        """omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
        """omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
@@ -363,12 +364,12 @@ if __name__ == "__main__":
                args.models_dir, args.results_dir, args.experiment_ids[2])"""
                args.models_dir, args.results_dir, args.experiment_ids[2])"""
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2])
                args.models_dir, args.results_dir, int(args.experiment_ids[2]))
        #omp_with_params_without_weights
        #omp_with_params_without_weights
        logger.info('Loading omp_with_params experiment scores...')
        logger.info('Loading omp_no_weights experiment scores...')
        omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
        omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
                args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False)


        """# base_with_params
        """# base_with_params
        logger.info('Loading base_with_params experiment scores 2...')
        logger.info('Loading base_with_params experiment scores 2...')
@@ -388,7 +389,7 @@ if __name__ == "__main__":
            raise ValueError('Score metrics of all experiments must be the same.')
            raise ValueError('Score metrics of all experiments must be the same.')
        experiments_score_metric = base_with_params_experiment_score_metric
        experiments_score_metric = base_with_params_experiment_score_metric


        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage4_fix')
        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage4')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)


        Plotter.plot_stage2_losses(
        Plotter.plot_stage2_losses(
@@ -402,47 +403,63 @@ if __name__ == "__main__":
            title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
            title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
    elif args.stage == 5:
    elif args.stage == 5:
        # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
        # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])
        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1]))
        all_labels = list()
        all_scores = list()


        # base_with_params
        # base_with_params
        logger.info('Loading base_with_params experiment scores...')
        logger.info('Loading base_with_params experiment scores...')
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
            base_with_params_experiment_score_metric = \
            base_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, int(args.experiment_ids[0]),
            extracted_forest_sizes_number)
            extracted_forest_sizes_number)
        # random_with_params
        # random_with_params
        logger.info('Loading random_with_params experiment scores...')
        logger.info('Loading random_with_params experiment scores...')
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
        # omp_with_params
        # omp_with_params
        logger.info('Loading omp_with_params experiment scores...')
        logger.info('Loading omp_with_params experiment scores...')
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2])
                args.models_dir, args.results_dir, int(args.experiment_ids[2]))
        # omp_with_params
        #omp_with_params_without_weights
        logger.info('Loading kmeans_with_params experiment scores...')
        logger.info('Loading omp_with_params experiment scores...')
        kmeans_with_params_train_scores, kmeans_with_params_dev_scores, kmeans_with_params_test_scores, _, \
        omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
            kmeans_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[3])
                args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False)

        all_labels = ['base', 'random', 'omp', 'omp_without_weights']
        all_scores = [base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
            omp_with_params_without_weights_test_scores]

        for i in range(3, len(args.experiment_ids)):
            if 'kmeans' in args.experiment_ids[i]:
                label = 'kmeans'
            elif 'similarity' in args.experiment_ids[i]:
                label = 'similarity'
            elif 'ensemble' in args.experiment_ids[i]:
                label = 'ensemble'
            else:
                logger.error('Invalid value encountered')
                continue


        # Sanity check on the metrics retreived
            logger.info(f'Loading {label} experiment scores...')
        if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric
            _, _, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes(
            == omp_with_params_experiment_score_metric == kmeans_with_params_experiment_score_metric):
                args.models_dir, args.results_dir, int(args.experiment_ids[i].split('=')[1]))
            raise ValueError('Score metrics of all experiments must be the same.')
            all_labels.append(label)
        experiments_score_metric = base_with_params_experiment_score_metric
            all_scores.append(current_test_scores)


        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage5_kmeans')
        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage5')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)


        Plotter.plot_stage2_losses(
        Plotter.plot_stage2_losses(
            file_path=output_path + os.sep + 'losses.png',
            file_path=output_path + os.sep + f"losses_{'-'.join(all_labels)}.png",
            all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
            all_experiment_scores=all_scores,
                kmeans_with_params_test_scores],
            all_labels=all_labels,
            all_labels=['base', 'random', 'omp', 'kmeans'],
            x_value=with_params_extracted_forest_sizes,
            x_value=with_params_extracted_forest_sizes,
            xlabel='Number of trees extracted',
            xlabel='Number of trees extracted',
            ylabel=experiments_score_metric,
            ylabel=base_with_params_experiment_score_metric,
            title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
            title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
    else:
    else:
        raise ValueError('This stage number is not supported yet, but it will be!')
        raise ValueError('This stage number is not supported yet, but it will be!')
+51 −18
Original line number Original line Diff line number Diff line
@@ -2,6 +2,7 @@ from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.dataset_loader import DatasetLoader
from bolsonaro.data.dataset_loader import DatasetLoader
from bolsonaro.models.model_factory import ModelFactory
from bolsonaro.models.model_factory import ModelFactory
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.ensemble_selection_forest_regressor import EnsembleSelectionForestRegressor
from bolsonaro.trainer import Trainer
from bolsonaro.trainer import Trainer
from bolsonaro.utils import resolve_experiment_id, tqdm_joblib
from bolsonaro.utils import resolve_experiment_id, tqdm_joblib
from bolsonaro import LOG_PATH
from bolsonaro import LOG_PATH
@@ -9,6 +10,7 @@ from bolsonaro.error_handling.logger_factory import LoggerFactory


from dotenv import find_dotenv, load_dotenv
from dotenv import find_dotenv, load_dotenv
import argparse
import argparse
import copy
import json
import json
import pathlib
import pathlib
import random
import random
@@ -53,10 +55,37 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb


    trainer = Trainer(dataset)
    trainer = Trainer(dataset)


    if parameters['extraction_strategy'] == 'ensemble':
        library = EnsembleSelectionForestRegressor.generate_library(dataset.X_train, dataset.y_train, random_state=seed)
    else:
        library = None

    if parameters['extraction_strategy'] == 'random':
        pretrained_model_parameters = ModelParameters(
            extracted_forest_size=parameters['forest_size'],
            normalize_D=parameters['normalize_D'],
            subsets_used=parameters['subsets_used'],
            normalize_weights=parameters['normalize_weights'],
            seed=seed,
            hyperparameters=hyperparameters,
            extraction_strategy=parameters['extraction_strategy']
        )
        pretrained_estimator = ModelFactory.build(dataset.task, pretrained_model_parameters, library=library)
        pretraned_trainer = Trainer(dataset)
        pretraned_trainer.init(pretrained_estimator, subsets_used=parameters['subsets_used'])
        pretrained_estimator.fit(
            X=pretraned_trainer._X_forest,
            y=pretraned_trainer._y_forest
        )
    else:
        pretrained_estimator = None
        pretrained_model_parameters = None

    if parameters['extraction_strategy'] != 'none':
    if parameters['extraction_strategy'] != 'none':
        with tqdm_joblib(tqdm(total=len(parameters['extracted_forest_size']), disable=not verbose)) as extracted_forest_size_job_pb:
        with tqdm_joblib(tqdm(total=len(parameters['extracted_forest_size']), disable=not verbose)) as extracted_forest_size_job_pb:
            Parallel(n_jobs=-1)(delayed(extracted_forest_size_job)(extracted_forest_size_job_pb, parameters['extracted_forest_size'][i],
            Parallel(n_jobs=-1)(delayed(extracted_forest_size_job)(extracted_forest_size_job_pb, parameters['extracted_forest_size'][i],
                models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer)
                models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer, library,
                pretrained_estimator=pretrained_estimator, pretrained_model_parameters=pretrained_model_parameters)
                for i in range(len(parameters['extracted_forest_size'])))
                for i in range(len(parameters['extracted_forest_size'])))
    else:
    else:
        forest_size = hyperparameters['n_estimators']
        forest_size = hyperparameters['n_estimators']
@@ -88,7 +117,7 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
            )
            )
            model_parameters.save(sub_models_dir, experiment_id)
            model_parameters.save(sub_models_dir, experiment_id)


            model = ModelFactory.build(dataset.task, model_parameters)
            model = ModelFactory.build(dataset.task, model_parameters, library=library)


            trainer.init(model, subsets_used=parameters['subsets_used'])
            trainer.init(model, subsets_used=parameters['subsets_used'])
            trainer.train(model)
            trainer.train(model)
@@ -97,7 +126,8 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
    seed_job_pb.update(1)
    seed_job_pb.update(1)


def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_size, models_dir,
def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_size, models_dir,
    seed, parameters, dataset, hyperparameters, experiment_id, trainer):
    seed, parameters, dataset, hyperparameters, experiment_id, trainer, library,
    pretrained_estimator=None, pretrained_model_parameters=None):


    logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_extracted_forest_size{}_ti{}'.format(
    logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_extracted_forest_size{}_ti{}'.format(
        seed, extracted_forest_size, threading.get_ident()))
        seed, extracted_forest_size, threading.get_ident()))
@@ -121,6 +151,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz


    pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
    pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)


    if not pretrained_estimator:
        model_parameters = ModelParameters(
        model_parameters = ModelParameters(
            extracted_forest_size=extracted_forest_size,
            extracted_forest_size=extracted_forest_size,
            normalize_D=parameters['normalize_D'],
            normalize_D=parameters['normalize_D'],
@@ -131,11 +162,13 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz
            extraction_strategy=parameters['extraction_strategy']
            extraction_strategy=parameters['extraction_strategy']
        )
        )
        model_parameters.save(sub_models_dir, experiment_id)
        model_parameters.save(sub_models_dir, experiment_id)

        model = ModelFactory.build(dataset.task, model_parameters, library=library)
    model = ModelFactory.build(dataset.task, model_parameters)
    else:
        model = copy.deepcopy(pretrained_estimator)
        pretrained_model_parameters.save(sub_models_dir, experiment_id)


    trainer.init(model, subsets_used=parameters['subsets_used'])
    trainer.init(model, subsets_used=parameters['subsets_used'])
    trainer.train(model)
    trainer.train(model, extracted_forest_size=extracted_forest_size)
    trainer.compute_results(model, sub_models_dir)
    trainer.compute_results(model, sub_models_dir)


"""
"""
@@ -202,7 +235,7 @@ if __name__ == "__main__":
    parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
    parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
    parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
    parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
    parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
    parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none, similarity, kmeans.')
    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none, similarity, kmeans, ensemble.')
    parser.add_argument('--overwrite', action='store_true', default=DEFAULT_OVERWRITE, help='Overwrite the experiment id')
    parser.add_argument('--overwrite', action='store_true', default=DEFAULT_OVERWRITE, help='Overwrite the experiment id')
    args = parser.parse_args()
    args = parser.parse_args()


@@ -213,7 +246,7 @@ if __name__ == "__main__":
    else:
    else:
        parameters = args.__dict__
        parameters = args.__dict__


    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity', 'kmeans']:
    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity', 'kmeans', 'ensemble']:
        raise ValueError('Specified extraction strategy {} is not supported.'.format(parameters.extraction_strategy))
        raise ValueError('Specified extraction strategy {} is not supported.'.format(parameters.extraction_strategy))


    pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
    pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
Original line number Original line Diff line number Diff line
@@ -6,15 +6,18 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage1",
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "random_seed_number": 1,
    "seeds": [
    "seeds": [
        2078,
        1,
        90
        2,
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -26,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "none",
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        36,
        33,
        73,
        67,
        109,
        100,
        145,
        133,
        182,
        167,
        218,
        200,
        255,
        233,
        291,
        267,
        327,
        300,
        364
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "none",
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "random",
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "none",
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "random",
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "none",
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "random",
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        33,
        33,
        67,
        67,
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
        4,
        7,
        11,
        14,
        18,
        22,
        25,
        29,
        32,
        36,
        40,
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "none",
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        4,
        4,
        7,
        7,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        4,
        4,
        7,
        7,
Original line number Original line Diff line number Diff line
@@ -17,9 +17,7 @@
        2,
        2,
        3,
        3,
        4,
        4,
        5,
        5
        6,
        7
    ],
    ],
    "subsets_used": "train+dev,train+dev",
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -31,6 +29,7 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "random",
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        4,
        4,
        7,
        7,
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage1",
    "models_dir": "models/diamonds/stage1",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "none",
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage1",
    "models_dir": "models/diamonds/stage1",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "none",
    "extraction_strategy": "none",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,13 +6,17 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage1",
    "models_dir": "models/diamonds/stage1",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "random_seed_number": 1,
    "seeds": [
    "seeds": [
        1,
        2,
        3,
        4,
        5
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
@@ -25,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage1",
    "models_dir": "models/diamonds/stage1",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage1",
    "models_dir": "models/diamonds/stage1",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "random",
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage1",
    "models_dir": "models/diamonds/stage1",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "random",
    "extraction_strategy": "random",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage2",
    "models_dir": "models/diamonds/stage2",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": true,
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage2",
    "models_dir": "models/diamonds/stage2",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": true,
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage2",
    "models_dir": "models/diamonds/stage2",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "normalize_weights": true,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage2",
    "models_dir": "models/diamonds/stage2",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "normalize_weights": true,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file
Original line number Original line Diff line number Diff line
@@ -6,8 +6,8 @@
    "normalize_D": false,
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "dataset_normalizer": "standard",
    "forest_size": null,
    "forest_size": null,
    "extracted_forest_size_samples": 10,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage3",
    "models_dir": "models/diamonds/stage3",
    "dev_size": 0.2,
    "dev_size": 0.2,
    "test_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,9 @@
    "seeds": [
    "seeds": [
        1,
        1,
        2,
        2,
        3
        3,
        4,
        5
    ],
    ],
    "subsets_used": "train,dev",
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "normalize_weights": false,
@@ -27,16 +29,37 @@
    ],
    ],
    "job_number": -1,
    "job_number": -1,
    "extraction_strategy": "omp",
    "extraction_strategy": "omp",
    "overwrite": false,
    "extracted_forest_size": [
    "extracted_forest_size": [
        16,
        14,
        31,
        29,
        47,
        43,
        62,
        57,
        78,
        72,
        94,
        86,
        109,
        100,
        125,
        114,
        140,
        129,
        156
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
    ]
}
}
 No newline at end of file

scripts/run_compute_results.sh

100644 → 100755
+6 −2

File changed.File mode changed from 100644 to 100755.

Preview size limit exceeded, changes collapsed.

+7 −7

File changed.File mode changed from 100644 to 100755.

Preview size limit exceeded, changes collapsed.

+5 −5

File changed.File mode changed from 100644 to 100755.

Preview size limit exceeded, changes collapsed.

+5 −5

File changed.File mode changed from 100644 to 100755.

Preview size limit exceeded, changes collapsed.

+6 −9

File changed.File mode changed from 100644 to 100755.

Preview size limit exceeded, changes collapsed.

+10 −7

File changed.File mode changed from 100644 to 100755.

Preview size limit exceeded, changes collapsed.