Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results

Target

Select target project
No results found
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results
Show changes
213 files
+ 2440
6257
Compare changes
  • Side-by-side
  • Inline

Files

Original line number Diff line number Diff line
from bolsonaro.data.dataset import Dataset
from bolsonaro.data.dataset_parameters import DatasetParameters
from bolsonaro.data.task import Task
from bolsonaro.utils import change_binary_func_load
from bolsonaro.utils import change_binary_func_load, change_binary_func_openml

from sklearn.datasets import load_boston, load_iris, load_diabetes, \
    load_digits, load_linnerud, load_wine, load_breast_cancer
from sklearn.datasets import fetch_olivetti_faces, fetch_20newsgroups, \
    fetch_20newsgroups_vectorized, fetch_lfw_people, fetch_lfw_pairs, \
    fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing
    fetch_covtype, fetch_rcv1, fetch_kddcup99, fetch_california_housing, \
    fetch_openml
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import random
@@ -30,13 +31,15 @@ class DatasetLoader(object):

    dataset_names = ['boston', 'iris', 'diabetes', 'digits', 'linnerud', 'wine',
        'breast_cancer', 'olivetti_faces', '20newsgroups_vectorized', 'lfw_people',
        'lfw_pairs', 'covtype', 'rcv1', 'california_housing', 'diamonds']
        'lfw_pairs', 'covtype', 'rcv1', 'california_housing', 'diamonds', 'steel-plates',
        'kr-vs-kp', 'kin8nm', 'spambase', 'musk', 'gamma']

    dataset_seed_numbers = {'boston':15, 'iris':15, 'diabetes':15, 'digits':5,
        'linnerud':15, 'wine':15, 'breast_cancer':15, 'olivetti_faces':15,
        '20newsgroups_vectorized':3, 'lfw_people':3,
        'lfw_pairs':3, 'covtype':3, 'rcv1':3, 'california_housing':3,
        'diamonds': 15}
        'diamonds': 15, 'steel-plates': 15, 'kr-vs-kp': 15, 'kin8nm': 15,
        'spambase': 15, 'musk': 15, 'gamma': 15}

    @staticmethod
    def load(dataset_parameters):
@@ -103,6 +106,24 @@ class DatasetLoader(object):
            df['clarity'] = label_clarity.fit_transform(df['clarity'])
            X, y = df.drop(['price'], axis=1), df['price']
            task = Task.REGRESSION
        elif name == 'steel-plates':
            dataset_loading_func = change_binary_func_openml('steel-plates-fault')
            task = Task.BINARYCLASSIFICATION
        elif name == 'kr-vs-kp':
            dataset_loading_func = change_binary_func_openml('kr-vs-kp')
            task = Task.BINARYCLASSIFICATION
        elif name == 'kin8nm':
            X, y = fetch_openml('kin8nm', return_X_y=True)
            task = Task.REGRESSION
        elif name == 'spambase':
            dataset_loading_func = change_binary_func_openml('spambase')
            task = Task.BINARYCLASSIFICATION
        elif name == 'musk':
            dataset_loading_func = change_binary_func_openml('musk')
            task = Task.BINARYCLASSIFICATION
        elif name == 'gamma':
            dataset_loading_func = change_binary_func_openml('MagicTelescope')
            task = Task.BINARYCLASSIFICATION
        else:
            raise ValueError("Unsupported dataset '{}'".format(name))

Original line number Diff line number Diff line
from bolsonaro.utils import tqdm_joblib

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator
from sklearn.cluster import KMeans
from abc import abstractmethod, ABCMeta
import numpy as np
from scipy.stats import mode
from joblib import Parallel, delayed
from tqdm import tqdm


class KMeansForestRegressor(BaseEstimator, metaclass=ABCMeta):
    """
    On extreme pruning of random forest ensembles for ral-time predictive applications', by Khaled Fawagreh, Mohamed Medhat Gaber and Eyad Elyan.
    """

    def __init__(self, models_parameters, score_metric=mean_squared_error):
        self._models_parameters = models_parameters
        self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters,
            random_state=self._models_parameters.seed, n_jobs=-1)
        self._extracted_forest_size = self._models_parameters.extracted_forest_size
        self._score_metric = score_metric

    @property
    def models_parameters(self):
        return self._models_parameters

    def fit(self, X_train, y_train, X_val, y_val):
        self._estimator.fit(X_train, y_train)

        predictions = list()
        for tree in self._estimator.estimators_:
            predictions.append(tree.predict(X_train))
        predictions = np.array(predictions)

        kmeans = KMeans(n_clusters=self._extracted_forest_size, random_state=self._models_parameters.seed).fit(predictions)
        labels = np.array(kmeans.labels_)

        # For each cluster select the best tree on the validation set
        extracted_forest_sizes = list(range(self._extracted_forest_size))
        with tqdm_joblib(tqdm(total=self._extracted_forest_size, disable=True)) as prune_forest_job_pb:
            pruned_forest = Parallel(n_jobs=-1)(delayed(self._prune_forest_job)(prune_forest_job_pb,
                extracted_forest_sizes[i], labels, X_val, y_val, self._score_metric)
                for i in range(self._extracted_forest_size))

        self._estimator.estimators_ = pruned_forest

    def _prune_forest_job(self, prune_forest_job_pb, c, labels, X_val, y_val, score_metric):
        index = np.where(labels == c)[0]
        with tqdm_joblib(tqdm(total=len(index), disable=True)) as cluster_job_pb:
            cluster = Parallel(n_jobs=-1)(delayed(self._cluster_job)(cluster_job_pb, index[i], X_val, 
                y_val, score_metric) for i in range(len(index)))
        best_tree_index = np.argmax(cluster)
        prune_forest_job_pb.update()
        return self._estimator.estimators_[index[best_tree_index]]

    def _cluster_job(self, cluster_job_pb, i, X_val, y_val, score_metric):
        y_val_pred = self._estimator.estimators_[i].predict(X_val)
        tree_pred = score_metric(y_val, y_val_pred)
        cluster_job_pb.update()
        return tree_pred

    def predict(self, X):
        return self._estimator.predict(X)

    def score(self, X, y):
        predictions = list()
        for tree in self._estimator.estimators_:
            predictions.append(tree.predict(X))
        predictions = np.array(predictions)
        mean_predictions = np.mean(predictions, axis=0)
        score = self._score_metric(mean_predictions, y)
        return score

    def predict_base_estimator(self, X):
        return self._estimator.predict(X)
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@ from bolsonaro.models.omp_forest_classifier import OmpForestBinaryClassifier, Om
from bolsonaro.models.omp_forest_regressor import OmpForestRegressor
from bolsonaro.models.model_parameters import ModelParameters
from bolsonaro.models.similarity_forest_regressor import SimilarityForestRegressor
from bolsonaro.models.kmeans_forest_regressor import KMeansForestRegressor
from bolsonaro.data.task import Task

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
@@ -22,9 +23,11 @@ class ModelFactory(object):
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
                    random_state=model_parameters.seed)
            else:
            elif model_parameters.extraction_strategy == 'none':
                return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
                    random_state=model_parameters.seed)
            else:
                raise ValueError('Invalid extraction strategy')
        elif task == Task.REGRESSION:
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestRegressor(model_parameters)
@@ -33,15 +36,21 @@ class ModelFactory(object):
                    random_state=model_parameters.seed)
            elif model_parameters.extraction_strategy == 'similarity':
                return SimilarityForestRegressor(model_parameters)
            else:
            elif model_parameters.extraction_strategy == 'kmeans':
                return KMeansForestRegressor(model_parameters)
            elif model_parameters.extraction_strategy == 'none':
                return RandomForestRegressor(n_estimators=model_parameters.hyperparameters['n_estimators'],
                    random_state=model_parameters.seed)
            else:
                raise ValueError('Invalid extraction strategy')
        elif task == Task.MULTICLASSIFICATION:
            if model_parameters.extraction_strategy == 'omp':
                return OmpForestMulticlassClassifier(model_parameters)
            elif model_parameters.extraction_strategy == 'random':
                return RandomForestClassifier(n_estimators=model_parameters.extracted_forest_size,
                    random_state=model_parameters.seed)
            else:
            elif model_parameters.extraction_strategy == 'none':
                return RandomForestClassifier(n_estimators=model_parameters.hyperparameters['n_estimators'],
                    random_state=model_parameters.seed)
            else:
                raise ValueError('Invalid extraction strategy')
Original line number Diff line number Diff line
@@ -6,12 +6,12 @@ import datetime

class ModelRawResults(object):

    def __init__(self, model_object, training_time,
    def __init__(self, model_weights, training_time,
        datetime, train_score, dev_score, test_score,
        train_score_base, dev_score_base,
        test_score_base, score_metric, base_score_metric):

        self._model_object = model_object
        self._model_weights = model_weights
        self._training_time = training_time
        self._datetime = datetime
        self._train_score = train_score
@@ -24,8 +24,8 @@ class ModelRawResults(object):
        self._base_score_metric = base_score_metric

    @property
    def model_object(self):
        return self.model_object
    def model_weights(self):
        return self.model_weights

    @property
    def training_time(self):
@@ -68,6 +68,8 @@ class ModelRawResults(object):
        return self._base_score_metric

    def save(self, models_dir):
        if not os.path.exists(models_dir):
            os.mkdir(models_dir)
        save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
            self.__dict__)

Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ from sklearn.base import BaseEstimator


class OmpForest(BaseEstimator, metaclass=ABCMeta):

    def __init__(self, models_parameters, base_forest_estimator):
        self._base_forest_estimator = base_forest_estimator
        self._models_parameters = models_parameters
@@ -24,7 +25,6 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
        return self._base_forest_estimator.score(X, y)

    def _base_estimator_predictions(self, X):
        # We need to use predict_proba to get the probabilities of each class
        return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T

    @property
@@ -33,6 +33,8 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):

    # sklearn baseestimator api methods
    def fit(self, X_forest, y_forest, X_omp, y_omp):
        # print(y_forest.shape)
        # print(set([type(y) for y in y_forest]))
        self._base_forest_estimator.fit(X_forest, y_forest)
        self._extract_subforest(X_omp, y_omp) # type: OrthogonalMatchingPursuit
        return self
@@ -96,6 +98,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
        pass

class SingleOmpForest(OmpForest):

    def __init__(self, models_parameters, base_forest_estimator):
        # fit_intercept shouldn't be set to False as the data isn't necessarily centered here
        # normalization is handled outsite OMP
@@ -123,3 +126,24 @@ class SingleOmpForest(OmpForest):
            forest_predictions /= self._forest_norms

        return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)

    def predict_no_weights(self, X):
        """
        Apply the SingleOmpForest to X without using the weights.

        Make all the base tree predictions

        :param X: a Forest
        :return: a np.array of the predictions of the entire forest
        """
        forest_predictions = self._base_estimator_predictions(X).T

        if self._models_parameters.normalize_D:
            forest_predictions /= self._forest_norms

        weights = self._omp.coef_
        omp_trees_indices = np.nonzero(weights)[0]

        select_trees = np.mean(forest_predictions[omp_trees_indices], axis=0)
        print(len(omp_trees_indices))
        return select_trees
Original line number Diff line number Diff line
@@ -24,6 +24,34 @@ class OmpForestBinaryClassifier(SingleOmpForest):

        return super().fit(X_forest, y_forest, X_omp, y_omp)

    def predict_no_weights(self, X):
        """
        Apply the SingleOmpForest to X without using the weights.

        Make all the base tree predictions

        :param X: a Forest
        :return: a np.array of the predictions of the entire forest
        """

        forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_])

        if self._models_parameters.normalize_D:
            forest_predictions /= self._forest_norms

        weights = self._omp.coef_
        omp_trees_indices = np.nonzero(weights)

        omp_trees_predictions = forest_predictions[omp_trees_indices].T[1]

        # Here forest_pred is the probability of being class 1.

        result_omp = np.mean(omp_trees_predictions, axis=1)

        result_omp = (result_omp - 0.5) * 2

        return result_omp

    def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
        """
        Evaluate OMPForestClassifer on (`X`, `y`) using `metric`
@@ -106,6 +134,36 @@ class OmpForestMulticlassClassifier(OmpForest):
        max_preds = np.argmax(preds, axis=1)
        return np.array(label_names)[max_preds]

    def predict_no_weights(self, X):
        """
        Apply the SingleOmpForest to X without using the weights.

        Make all the base tree predictions

        :param X: a Forest
        :return: a np.array of the predictions of the entire forest
        """

        forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T

        if self._models_parameters.normalize_D:
            forest_predictions /= self._forest_norms

        label_names = []
        preds = []
        num_class = 0
        for class_label, omp_class in self._dct_class_omp.items():
            weights = omp_class.coef_
            omp_trees_indices = np.nonzero(weights)
            label_names.append(class_label)
            atoms_binary = (forest_predictions[num_class].T - 0.5) * 2 # centré réduit de 0/1 à -1/1
            preds.append(np.sum(atoms_binary[omp_trees_indices], axis=0)/len(omp_trees_indices))
            num_class += 1

        preds = np.array(preds).T
        max_preds = np.argmax(preds, axis=1)
        return np.array(label_names)[max_preds]

    def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
        predictions = self.predict(X)

Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator
from abc import abstractmethod, ABCMeta
import numpy as np
from tqdm import tqdm


class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta):
@@ -10,56 +11,69 @@ class SimilarityForestRegressor(BaseEstimator, metaclass=ABCMeta):
    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2822360/
    """

    def __init__(self, models_parameters):
    def __init__(self, models_parameters, score_metric=mean_squared_error):
        self._models_parameters = models_parameters
        self._regressor = RandomForestRegressor(n_estimators=self._models_parameters.hyperparameters['n_estimators'],
            random_state=models_parameters.seed)
        self._estimator = RandomForestRegressor(**self._models_parameters.hyperparameters,
            random_state=self._models_parameters.seed, n_jobs=-1)
        self._extracted_forest_size = self._models_parameters.extracted_forest_size
        self._score_metric = score_metric

    @property
    def models_parameters(self):
        return self._models_parameters

    def fit(self, X_train, y_train, X_val, y_val, score_metric=mean_squared_error):
    def fit(self, X_train, y_train, X_val, y_val):
        self._estimator.fit(X_train, y_train)

        self._regressor.fit(X_train, y_train)

        y_val_pred = self._regressor.predict(X_val)
        forest_pred = score_metric(y_val, y_val_pred)
        forest = self._regressor.estimators_
        y_val_pred = self._estimator.predict(X_val)
        forest_pred = self._score_metric(y_val, y_val_pred)
        forest = self._estimator.estimators_
        selected_trees = list()
        tree_list = list(self._regressor.estimators_)
        tree_list = list(self._estimator.estimators_)

        val_scores = list()
        with tqdm(tree_list) as tree_pred_bar:
            tree_pred_bar.set_description('[Initial tree predictions]')
            for tree in tree_pred_bar:
                val_scores.append(tree.predict(X_val))
            tree_pred_bar.update(1)

        for _ in range(self._extracted_forest_size):
        with tqdm(range(self._extracted_forest_size), disable=True) as pruning_forest_bar:
            pruning_forest_bar.set_description(f'[Pruning forest s={self._extracted_forest_size}]')
            for i in pruning_forest_bar:
                best_similarity = 100000
                found_index = 0
            for i in range(len(tree_list)):
                lonely_tree = tree_list[i]
                del tree_list[i]
                val_list = list()
                for tree in tree_list:
                    val_pred = tree.predict(X_val)
                    val_list.append(val_pred)
                val_list = np.array(val_list)
                val_mean = np.mean(val_list, axis=0)
                val_score = score_metric(val_mean, y_val)
                with tqdm(range(len(tree_list)), disable=True) as tree_list_bar:
                    tree_list_bar.set_description(f'[Tree selection s={self._extracted_forest_size} #{i}]')
                    for j in tree_list_bar:
                        lonely_tree = tree_list[j]
                        del tree_list[j]
                        val_mean = np.mean(np.asarray(val_scores), axis=0)
                        val_score = self._score_metric(val_mean, y_val)
                        temp_similarity = abs(forest_pred - val_score)
                        if (temp_similarity < best_similarity):
                    found_index = i
                            found_index = j
                            best_similarity = temp_similarity
                tree_list.insert(i, lonely_tree)
                        tree_list.insert(j, lonely_tree)
                        val_scores.insert(j, lonely_tree.predict(X_val))
                        tree_list_bar.update(1)
                selected_trees.append(tree_list[found_index])
                del tree_list[found_index]
                del val_scores[found_index]
                pruning_forest_bar.update(1)

        pruned_forest = list(set(forest) - set(selected_trees))
        self._regressor.estimators_ = pruned_forest
        self._estimator.estimators_ = pruned_forest

    def score(self, X, y):
        test_list = list()
        for mod in self._regressor.estimators_:
        for mod in self._estimator.estimators_:
            test_pred = mod.predict(X)
            test_list.append(test_pred)
        test_list = np.array(test_list)
        test_mean = np.mean(test_list, axis=0)
        score = mean_squared_error(test_mean, y)
        score = self._score_metric(test_mean, y)
        return score

    def predict_base_estimator(self, X):
        return self._estimator.predict(X)
Original line number Diff line number Diff line
@@ -95,14 +95,21 @@ class Trainer(object):
            )
        self._end_time = time.time()

    def __score_func(self, model, X, y_true):
    def __score_func(self, model, X, y_true, weights=True):
        if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
            if weights:
                y_pred = model.predict(X)
            else:
                y_pred = model.predict_no_weights(X)
            result = self._regression_score_metric(y_true, y_pred)
        elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]:
            if weights:
                y_pred = model.predict(X)
            else:
                y_pred = model.predict_no_weights(X)
            if type(model) is OmpForestBinaryClassifier:
                y_pred = y_pred.round()
                y_pred = np.sign(y_pred)
                y_pred = np.where(y_pred==0, 1, y_pred)
            result = self._classification_score_metric(y_true, y_pred)
        return result

@@ -126,8 +133,17 @@ class Trainer(object):
        :param model: Object with
        :param models_dir: Where the results will be saved
        """

        model_weights = ''
        if type(model) in [OmpForestRegressor, OmpForestBinaryClassifier]:
            model_weights = model._omp.coef_
        elif type(model) == OmpForestMulticlassClassifier:
            model_weights = model._dct_class_omp
        elif type(model) == OmpForestBinaryClassifier:
            model_weights = model._omp

        results = ModelRawResults(
            model_object='',
            model_weights=model_weights,
            training_time=self._end_time - self._begin_time,
            datetime=datetime.datetime.now(),
            train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train),
@@ -148,3 +164,27 @@ class Trainer(object):

        self._logger.info("Base performance on dev: {}".format(results.dev_score_base))
        self._logger.info("Performance on dev: {}".format(results.dev_score))

        if type(model) not in [RandomForestRegressor, RandomForestClassifier]:
            results = ModelRawResults(
                model_weights='',
                training_time=self._end_time - self._begin_time,
                datetime=datetime.datetime.now(),
                train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train, False),
                dev_score=self.__score_func(model, self._dataset.X_dev, self._dataset.y_dev, False),
                test_score=self.__score_func(model, self._dataset.X_test, self._dataset.y_test, False),
                train_score_base=self.__score_func_base(model, self._dataset.X_train, self._dataset.y_train),
                dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
                test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
                score_metric=self._score_metric_name,
                base_score_metric=self._base_score_metric_name
            )
            results.save(models_dir+'_no_weights')
            self._logger.info("Base performance on test without weights: {}".format(results.test_score_base))
            self._logger.info("Performance on test: {}".format(results.test_score))

            self._logger.info("Base performance on train without weights: {}".format(results.train_score_base))
            self._logger.info("Performance on train: {}".format(results.train_score))

            self._logger.info("Base performance on dev without weights: {}".format(results.dev_score_base))
            self._logger.info("Performance on dev: {}".format(results.dev_score))
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@ from copy import deepcopy
import contextlib
import joblib

from sklearn.datasets import fetch_openml


def resolve_experiment_id(models_dir):
    """
@@ -78,6 +80,16 @@ def change_binary_func_load(base_load_function):
        return X, y
    return func_load

def change_binary_func_openml(dataset_name):
    def func_load(return_X_y=True, random_state=None):
        X, y = fetch_openml(dataset_name, return_X_y=return_X_y)
        possible_classes = sorted(set(y))
        assert len(possible_classes) == 2, "Function change binary_func_load only work for binary classfication"
        y = binarize_class_data(y, possible_classes[-1])
        y = y.astype('int')
        return X, y
    return func_load

@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
Original line number Diff line number Diff line
@@ -109,16 +109,16 @@ class Plotter(object):

        fig, ax = plt.subplots()

        n = len(all_experiment_scores)
        nb_experiments = len(all_experiment_scores)

        """
        Get as many different colors from the specified cmap (here nipy_spectral)
        as there are curve to plot.
        """
        colors = Plotter.get_colors_from_cmap(n)
        colors = Plotter.get_colors_from_cmap(nb_experiments)

        # For each curve to plot
        for i in range(n):
        for i in range(nb_experiments):
            # Retreive the scores in a list for each seed
            experiment_scores = list(all_experiment_scores[i].values())
            # Compute the mean and the std for the CI
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ def retreive_extracted_forest_sizes_number(models_dir, experiment_id):
    extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes'
    return len(os.listdir(extracted_forest_sizes_root_path))

def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id):
def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id, weights=True):
    experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
    experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds

@@ -28,6 +28,7 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
    experiment_train_scores = dict()
    experiment_dev_scores = dict()
    experiment_test_scores = dict()
    experiment_weights = dict()
    all_extracted_forest_sizes = list()

    # Used to check if all losses were computed using the same metric (it should be the case)
@@ -44,14 +45,19 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
        experiment_train_scores[seed] = list()
        experiment_dev_scores[seed] = list()
        experiment_test_scores[seed] = list()
        experiment_weights[seed] = list()

        # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
        extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
        extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ]
        extracted_forest_sizes.sort(key=int)
        all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes)))
        for extracted_forest_size in extracted_forest_sizes:
            # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
            if weights:
                extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
            else:
                extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size + '_no_weights'
            # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
            model_raw_results = ModelRawResults.load(extracted_forest_size_path)
            # Save the scores
@@ -60,6 +66,8 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
            experiment_test_scores[seed].append(model_raw_results.test_score)
            # Save the metric
            experiment_score_metrics.append(model_raw_results.score_metric)
            # Save the weights
            #experiment_weights[seed].append(model_raw_results.model_weights)

    # Sanity checks
    if len(set(experiment_score_metrics)) > 1:
@@ -67,7 +75,8 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
    if len(set([sum(extracted_forest_sizes) for extracted_forest_sizes in all_extracted_forest_sizes])) != 1:
        raise ValueError("The extracted forest sizes aren't the sames across seeds.")

    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, all_extracted_forest_sizes[0], experiment_score_metrics[0]
    return experiment_train_scores, experiment_dev_scores, experiment_test_scores, \
        all_extracted_forest_sizes[0], experiment_score_metrics[0]#, experiment_weights

def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number):
    experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
@@ -120,6 +129,7 @@ if __name__ == "__main__":

    DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
    DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
    DEFAULT_PLOT_WEIGHT_DENSITY = False

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].')
@@ -130,6 +140,7 @@ if __name__ == "__main__":
    parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
    parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
    parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
    parser.add_argument('--plot_weight_density', action='store_true', default=DEFAULT_PLOT_WEIGHT_DENSITY, help='Plot the weight density. Only working for regressor models for now.')
    args = parser.parse_args()

    if args.stage not in list(range(1, 6)):
@@ -347,9 +358,17 @@ if __name__ == "__main__":
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
        # omp_with_params
        logger.info('Loading omp_with_params experiment scores...')
        """omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
            omp_with_params_experiment_score_metric, experiment_weights = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2])"""
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2])
        #omp_with_params_without_weights
        logger.info('Loading omp_with_params experiment scores...')
        omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)

        """# base_with_params
        logger.info('Loading base_with_params experiment scores 2...')
@@ -369,13 +388,58 @@ if __name__ == "__main__":
            raise ValueError('Score metrics of all experiments must be the same.')
        experiments_score_metric = base_with_params_experiment_score_metric

        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage4')
        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage4_fix')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

        Plotter.plot_stage2_losses(
            file_path=output_path + os.sep + 'losses.png',
            all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores],
            all_labels=['base', 'random', 'omp'],
            all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
                                   omp_with_params_without_weights_test_scores],
            all_labels=['base', 'random', 'omp', 'omp_without_weights'],
            x_value=with_params_extracted_forest_sizes,
            xlabel='Number of trees extracted',
            ylabel=experiments_score_metric,
            title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
    elif args.stage == 5:
        # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
        extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, args.experiment_ids[1])

        # base_with_params
        logger.info('Loading base_with_params experiment scores...')
        base_with_params_train_scores, base_with_params_dev_scores, base_with_params_test_scores, \
            base_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_forest_size(args.models_dir, args.results_dir, args.experiment_ids[0],
            extracted_forest_sizes_number)
        # random_with_params
        logger.info('Loading random_with_params experiment scores...')
        random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
            with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
            extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
        # omp_with_params
        logger.info('Loading omp_with_params experiment scores...')
        omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
            omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[2])
        # omp_with_params
        logger.info('Loading kmeans_with_params experiment scores...')
        kmeans_with_params_train_scores, kmeans_with_params_dev_scores, kmeans_with_params_test_scores, _, \
            kmeans_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
                args.models_dir, args.results_dir, args.experiment_ids[3])
        
        # Sanity check on the metrics retreived
        if not (base_with_params_experiment_score_metric == random_with_params_experiment_score_metric
            == omp_with_params_experiment_score_metric == kmeans_with_params_experiment_score_metric):
            raise ValueError('Score metrics of all experiments must be the same.')
        experiments_score_metric = base_with_params_experiment_score_metric

        output_path = os.path.join(args.results_dir, args.dataset_name, 'stage5_kmeans')
        pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

        Plotter.plot_stage2_losses(
            file_path=output_path + os.sep + 'losses.png',
            all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
                kmeans_with_params_test_scores],
            all_labels=['base', 'random', 'omp', 'kmeans'],
            x_value=with_params_extracted_forest_sizes,
            xlabel='Number of trees extracted',
            ylabel=experiments_score_metric,
@@ -384,16 +448,3 @@ if __name__ == "__main__":
        raise ValueError('This stage number is not supported yet, but it will be!')

    logger.info('Done.')

    """
    TODO:
    For each dataset:
    Stage 1) [DONE for california_housing] A figure for the selection of the best base forest model hyperparameters (best vs default/random hyperparams)
    Stage 2) [DONE for california_housing] A figure for the selection of the best combination of normalization: D normalization vs weights normalization (4 combinations)
    Stage 3) [DONE for california_housing] A figure for the selection of the most relevant subsets combination: train,dev vs train+dev,train+dev vs train,train+dev
    Stage 4) A figure to finally compare the perf of our approach using the previous selected
        parameters vs the baseline vs other papers using different extracted forest size
        (percentage of the tree size found previously in best hyperparams search) on the abscissa.

    IMPORTANT: Compare experiments that used the same seeds among them (except for stage 1).
    """
+78 −37
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ import numpy as np
import shutil


def process_job(seed, parameters, experiment_id, hyperparameters):
def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verbose):
    """
    Experiment function.

@@ -34,7 +34,6 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
    """
    logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_ti{}'.format(
        seed, threading.get_ident()))
    logger.info('seed={}'.format(seed))

    seed_str = str(seed)
    experiment_id_str = str(experiment_id)
@@ -55,13 +54,31 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
    trainer = Trainer(dataset)

    if parameters['extraction_strategy'] != 'none':
        for extracted_forest_size in parameters['extracted_forest_size']:
            logger.info('extracted_forest_size={}'.format(extracted_forest_size))
            sub_models_dir = models_dir + os.sep + 'extracted_forest_sizes' + os.sep + str(extracted_forest_size)
            pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
        with tqdm_joblib(tqdm(total=len(parameters['extracted_forest_size']), disable=not verbose)) as extracted_forest_size_job_pb:
            Parallel(n_jobs=-1)(delayed(extracted_forest_size_job)(extracted_forest_size_job_pb, parameters['extracted_forest_size'][i],
                models_dir, seed, parameters, dataset, hyperparameters, experiment_id, trainer)
                for i in range(len(parameters['extracted_forest_size'])))
    else:
        forest_size = hyperparameters['n_estimators']
        logger.info('Base forest training with fixed forest size of {}'.format(forest_size))
        sub_models_dir = models_dir + os.sep + 'forest_size' + os.sep + str(forest_size)

        # Check if the result file already exists
        already_exists = False
        if os.path.isdir(sub_models_dir):
            sub_models_dir_files = os.listdir(sub_models_dir)
            for file_name in sub_models_dir_files:
                if '.pickle' != os.path.splitext(file_name)[1]:
                    continue
                else:
                    already_exists = os.path.getsize(os.path.join(sub_models_dir, file_name)) > 0
                    break
        if already_exists:
            logger.info('Base forest result already exists. Skipping...')
        else:
            pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)
            model_parameters = ModelParameters(
                extracted_forest_size=extracted_forest_size,
                extracted_forest_size=forest_size,
                normalize_D=parameters['normalize_D'],
                subsets_used=parameters['subsets_used'],
                normalize_weights=parameters['normalize_weights'],
@@ -76,14 +93,36 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
            trainer.init(model, subsets_used=parameters['subsets_used'])
            trainer.train(model)
            trainer.compute_results(model, sub_models_dir)
    logger.info(f'Training done for seed {seed_str}')
    seed_job_pb.update(1)

def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_size, models_dir,
    seed, parameters, dataset, hyperparameters, experiment_id, trainer):

    logger = LoggerFactory.create(LOG_PATH, 'training_seed{}_extracted_forest_size{}_ti{}'.format(
        seed, extracted_forest_size, threading.get_ident()))
    logger.info('extracted_forest_size={}'.format(extracted_forest_size))

    sub_models_dir = models_dir + os.sep + 'extracted_forest_sizes' + os.sep + str(extracted_forest_size)

    # Check if the result file already exists
    already_exists = False
    if os.path.isdir(sub_models_dir):
        sub_models_dir_files = os.listdir(sub_models_dir)
        for file_name in sub_models_dir_files:
            if '.pickle' != os.path.splitext(file_name)[1]:
                return
            else:
        forest_size = hyperparameters['n_estimators']
        logger.info('Base forest training with fixed forest size of {}'.format(forest_size))
        sub_models_dir = models_dir + os.sep + 'forest_size' + os.sep + str(forest_size)
                already_exists = os.path.getsize(os.path.join(sub_models_dir, file_name)) > 0
                break
    if already_exists:
        logger.info(f'Extracted forest {extracted_forest_size} result already exists. Skipping...')
        return

    pathlib.Path(sub_models_dir).mkdir(parents=True, exist_ok=True)

    model_parameters = ModelParameters(
            extracted_forest_size=forest_size,
        extracted_forest_size=extracted_forest_size,
        normalize_D=parameters['normalize_D'],
        subsets_used=parameters['subsets_used'],
        normalize_weights=parameters['normalize_weights'],
@@ -98,7 +137,6 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
    trainer.init(model, subsets_used=parameters['subsets_used'])
    trainer.train(model)
    trainer.compute_results(model, sub_models_dir)
    logger.info('Training done')

"""
Command lines example for stage 1:
@@ -138,6 +176,7 @@ if __name__ == "__main__":
    DEFAULT_SKIP_BEST_HYPERPARAMS = False
    DEFAULT_JOB_NUMBER = -1
    DEFAULT_EXTRACTION_STRATEGY = 'omp'
    DEFAULT_OVERWRITE = False

    begin_random_seed_range = 1
    end_random_seed_range = 2000
@@ -163,7 +202,8 @@ if __name__ == "__main__":
    parser.add_argument('--skip_best_hyperparams', action='store_true', default=DEFAULT_SKIP_BEST_HYPERPARAMS, help='Do not use the best hyperparameters if there exist.')
    parser.add_argument('--save_experiment_configuration', nargs='+', default=None, help='Save the experiment parameters specified in the command line in a file. Args: {{stage_num}} {{name}}')
    parser.add_argument('--job_number', nargs='?', type=int, default=DEFAULT_JOB_NUMBER, help='Specify the number of job used during the parallelisation across seeds.')
    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none or similarity.')
    parser.add_argument('--extraction_strategy', nargs='?', type=str, default=DEFAULT_EXTRACTION_STRATEGY, help='Specify the strategy to apply to extract the trees from the forest. Either omp, random, none, similarity, kmeans.')
    parser.add_argument('--overwrite', action='store_true', default=DEFAULT_OVERWRITE, help='Overwrite the experiment id')
    args = parser.parse_args()

    if args.experiment_configuration:
@@ -173,7 +213,7 @@ if __name__ == "__main__":
    else:
        parameters = args.__dict__

    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity']:
    if parameters['extraction_strategy'] not in ['omp', 'random', 'none', 'similarity', 'kmeans']:
        raise ValueError('Specified extraction strategy {} is not supported.'.format(parameters.extraction_strategy))

    pathlib.Path(parameters['models_dir']).mkdir(parents=True, exist_ok=True)
@@ -208,7 +248,7 @@ if __name__ == "__main__":
    parameters['extracted_forest_size'] = np.unique(np.around(hyperparameters['n_estimators'] *
        np.linspace(0, args.extracted_forest_size_stop,
        parameters['extracted_forest_size_samples'] + 1,
        endpoint=False)[1:]).astype(np.int)).tolist()
        endpoint=True)[1:]).astype(np.int)).tolist()

    if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
        logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')    
@@ -220,6 +260,7 @@ if __name__ == "__main__":

    if args.experiment_id:
        experiment_id = args.experiment_id
        if args.overwrite:
            shutil.rmtree(os.path.join(parameters['models_dir'], str(experiment_id)), ignore_errors=True)
    else:
        # Resolve the next experiment id number (last id + 1)
@@ -255,6 +296,6 @@ if __name__ == "__main__":
            )

    # Run as much job as there are seeds
    with tqdm_joblib(tqdm(total=len(seeds), disable=not args.verbose)) as progress_bar:
        Parallel(n_jobs=args.job_number)(delayed(process_job)(seeds[i],
            parameters, experiment_id, hyperparameters) for i in range(len(seeds)))
    with tqdm_joblib(tqdm(total=len(seeds), disable=not args.verbose)) as seed_job_pb:
        Parallel(n_jobs=args.job_number)(delayed(seed_job)(seed_job_pb, seeds[i],
            parameters, experiment_id, hyperparameters, args.verbose) for i in range(len(seeds)))

experiments/.gitkeep

deleted100644 → 0
+0 −0
Original line number Diff line number Diff line
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "scorer": "accuracy",
    "best_score_train": 0.7953125,
    "best_score_test": 0.7909854175872735,
    "best_parameters": {
        "max_depth": 20,
        "max_features": "sqrt",
        "min_samples_leaf": 1,
        "n_estimators": 809
    },
    "random_seed": 1763
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "20newsgroups_vectorized",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/20newsgroups_vectorized/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        7,
        13,
        20,
        27,
        34
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "boston",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/boston/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/boston/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "breast_cancer",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/breast_cancer/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        12,
        13,
        14,
        15,
        17,
        18,
        19,
        21,
        22,
        23,
        25,
        26,
        27,
        28,
        30,
        31,
        32,
        34,
        35,
        36,
        37,
        39
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        12,
        13,
        14,
        15,
        17,
        18,
        19,
        21,
        22,
        23,
        25,
        26,
        27,
        28,
        30,
        31,
        32,
        34,
        35,
        36,
        37,
        39
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/breast_cancer/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        12,
        13,
        14,
        15,
        17,
        18,
        19,
        21,
        22,
        23,
        25,
        26,
        27,
        28,
        30,
        31,
        32,
        34,
        35,
        36,
        37,
        39
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "models_dir": "./models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
@@ -33,5 +33,5 @@
        66,
        83
    ],
    "experiment_id": 1
    "experiment_id": 26
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 4
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 3
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 6
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 2
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.1,
    "models_dir": ".\\models",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        16,
        33,
        50,
        66,
        83
    ],
    "experiment_id": 5
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "california_housing",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/california_housing/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/california_housing/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        13,
        26,
        39,
        52,
        65,
        77,
        90,
        103,
        116,
        129,
        142,
        155,
        168,
        181,
        194,
        206,
        219,
        232,
        245,
        258,
        271,
        284,
        297,
        310,
        323,
        335,
        348,
        361,
        374,
        387
        33,
        67,
        100,
        133,
        167,
        200,
        233,
        267,
        300,
        333,
        367,
        400,
        433,
        467,
        500,
        533,
        567,
        600,
        633,
        667,
        700,
        733,
        767,
        800,
        833,
        867,
        900,
        933,
        967,
        1000
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "none_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "random_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
+0 −38
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "diabetes",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/diabetes/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,train+dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        2,
        3,
        4
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        1,
        3,
        4,
        6,
        7,
        8,
        10,
        11,
        13,
        14,
        15,
        17,
        18,
        20,
        21,
        22,
        24,
        25,
        26,
        28,
        29,
        31,
        32,
        33,
        35,
        36,
        38,
        39,
        40,
        42
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        1,
        3,
        4,
        6,
        7,
        8,
        10,
        11,
        13,
        14,
        15,
        17,
        18,
        20,
        21,
        22,
        24,
        25,
        26,
        28,
        29,
        31,
        32,
        33,
        35,
        36,
        38,
        39,
        40,
        42
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diabetes/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        1,
        3,
        4,
        6,
        7,
        8,
        10,
        11,
        13,
        14,
        15,
        17,
        18,
        20,
        21,
        22,
        24,
        25,
        26,
        28,
        29,
        31,
        32,
        33,
        35,
        36,
        38,
        39,
        40,
        42
        43,
        47,
        50,
        54,
        58,
        61,
        65,
        68,
        72,
        76,
        79,
        83,
        86,
        90,
        94,
        97,
        101,
        104,
        108
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        6,
        11,
        17,
        22,
        28,
        33,
        39,
        44,
        50,
        55,
        61,
        66,
        14,
        29,
        43,
        57,
        72,
        77,
        83,
        89,
        94,
        86,
        100,
        105,
        111,
        116,
        122,
        127,
        133,
        138,
        144,
        149,
        155,
        161,
        166
        114,
        129,
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        6,
        11,
        17,
        22,
        28,
        33,
        39,
        44,
        50,
        55,
        61,
        66,
        14,
        29,
        43,
        57,
        72,
        77,
        83,
        89,
        94,
        86,
        100,
        105,
        111,
        116,
        122,
        127,
        133,
        138,
        144,
        149,
        155,
        161,
        166
        114,
        129,
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 30,
    "extracted_forest_size_stop": 0.4,
    "extracted_forest_size_stop": 1.0,
    "models_dir": "models/diamonds/stage4",
    "dev_size": 0.2,
    "test_size": 0.2,
@@ -15,7 +15,11 @@
    "seeds": [
        1,
        2,
        3
        3,
        4,
        5,
        6,
        7
    ],
    "subsets_used": "train+dev,train+dev",
    "normalize_weights": false,
@@ -28,35 +32,35 @@
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        6,
        11,
        17,
        22,
        28,
        33,
        39,
        44,
        50,
        55,
        61,
        66,
        14,
        29,
        43,
        57,
        72,
        77,
        83,
        89,
        94,
        86,
        100,
        105,
        111,
        116,
        122,
        127,
        133,
        138,
        144,
        149,
        155,
        161,
        166
        114,
        129,
        143,
        157,
        172,
        186,
        200,
        214,
        229,
        243,
        257,
        272,
        286,
        300,
        315,
        329,
        343,
        358,
        372,
        386,
        400,
        415,
        429
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "none_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "none",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "1",
        "omp_with_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 6,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "omp_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 5,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage1",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": true,
    "save_experiment_configuration": [
        "1",
        "random_wo_params"
    ],
    "job_number": -1,
    "extraction_strategy": "random",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "no_normalization"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 2,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
Original line number Diff line number Diff line
{
    "experiment_id": 4,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": true,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_D_and_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 3,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage2",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": true,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "2",
        "normalize_weights"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file
+0 −39
Original line number Diff line number Diff line
{
    "experiment_id": 1,
    "experiment_configuration": null,
    "experiment_configuration_path": "experiments",
    "dataset_name": "digits",
    "normalize_D": false,
    "dataset_normalizer": "standard",
    "forest_size": null,
    "extracted_forest_size_samples": 5,
    "extracted_forest_size_stop": 0.05,
    "models_dir": "models/digits/stage3",
    "dev_size": 0.2,
    "test_size": 0.2,
    "random_seed_number": 1,
    "seeds": [
        1,
        2,
        3,
        4,
        5
    ],
    "subsets_used": "train,dev",
    "normalize_weights": false,
    "verbose": false,
    "skip_best_hyperparams": false,
    "save_experiment_configuration": [
        "3",
        "train-dev_subset"
    ],
    "job_number": -1,
    "extraction_strategy": "omp",
    "extracted_forest_size": [
        8,
        17,
        25,
        33,
        42
    ]
}
 No newline at end of file