From 58061ea4fc0593ad6a22895b1120ddaa5e1053c5 Mon Sep 17 00:00:00 2001 From: Charly Lamothe <charly.lamothe@univ-amu.fr> Date: Thu, 26 Dec 2019 10:44:10 +0100 Subject: [PATCH] - Add command lines for stage2 experiments; - Fix possible issues for extracted forest sizes computation: around to reduce possible zeroes and remove duplicates; - Create output experiment stage dir if not exists; - Add base_score_metric to model raw results class; - Add best params for lfw_pairs (maybe try with a larger number of random seeds since the score is not that high). --- code/bolsonaro/models/model_raw_results.py | 19 ++++++++------ code/train.py | 29 ++++++++++++++-------- experiments/lfw_pairs/stage1/params.json | 16 ++++++++++++ 3 files changed, 47 insertions(+), 17 deletions(-) create mode 100644 experiments/lfw_pairs/stage1/params.json diff --git a/code/bolsonaro/models/model_raw_results.py b/code/bolsonaro/models/model_raw_results.py index 8c5f9c7..e503742 100644 --- a/code/bolsonaro/models/model_raw_results.py +++ b/code/bolsonaro/models/model_raw_results.py @@ -8,8 +8,8 @@ class ModelRawResults(object): def __init__(self, model_object, training_time, datetime, train_score, dev_score, test_score, - score_metric, train_score_base, dev_score_base, - test_score_base): + train_score_base, dev_score_base, + test_score_base, score_metric, base_score_metric): self._model_object = model_object self._training_time = training_time @@ -17,10 +17,11 @@ class ModelRawResults(object): self._train_score = train_score self._dev_score = dev_score self._test_score = test_score - self._score_metric = score_metric self._train_score_base = train_score_base self._dev_score_base = dev_score_base self._test_score_base = test_score_base + self._score_metric = score_metric + self._base_score_metric = base_score_metric @property def model_object(self): @@ -46,10 +47,6 @@ class ModelRawResults(object): def test_score(self): return self._test_score - @property - def score_metric(self): - return self._score_metric - @property def train_score_base(self): return self._train_score_base @@ -62,6 +59,14 @@ class ModelRawResults(object): def test_score_base(self): return self._test_score_base + @property + def score_metric(self): + return self._score_metric + + @property + def base_score_metric(self): + return self._base_score_metric + def save(self, models_dir): save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle', self.__dict__) diff --git a/code/train.py b/code/train.py index afcf28b..dbf22c7 100644 --- a/code/train.py +++ b/code/train.py @@ -101,13 +101,20 @@ def process_job(seed, parameters, experiment_id, hyperparameters): """ Example for stage 1: -python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=none --save_experiment_configuration 1 none_with_params -python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=random --save_experiment_configuration 1 random_with_params -python code/train.py --dataset_name=california_housing --seeds 1 2 3 --save_experiment_configuration 1 omp_with_params -python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params -python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params -python code/train.py --dataset_name=california_housing --seeds 1 2 3 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05 python code/compute_results.py --stage 1 --experiment_ids 1 2 3 4 5 6 --dataset_name=california_housing --extracted_forest_sizes_number=5 + +Example for stage 2: +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 no_normalization --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D --normalize_D --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_weights --normalize_weights --extracted_forest_size_stop=0.05 +python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D_and_weights --normalize_D --normalize_weights --extracted_forest_size_stop=0.05 +python code/compute_results.py --stage 2 --experiment_ids 7 8 9 10 --dataset_name=california_housing --extracted_forest_sizes_number=5 """ if __name__ == "__main__": load_dotenv(find_dotenv('.env')) @@ -184,10 +191,10 @@ if __name__ == "__main__": hyperparameters['n_estimators'] = parameters['forest_size'] # The number of tree to extract from forest (K) - parameters['extracted_forest_size'] = (hyperparameters['n_estimators'] * + parameters['extracted_forest_size'] = np.unique(np.around(hyperparameters['n_estimators'] * np.linspace(0, args.extracted_forest_size_stop, parameters['extracted_forest_size_samples'] + 1, - endpoint=False)[1:]).astype(np.int).tolist() + endpoint=False)[1:]).astype(np.int)).tolist() if parameters['seeds'] != None and parameters['random_seed_number'] > 1: logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.') @@ -213,8 +220,10 @@ if __name__ == "__main__": raise ValueError('save_experiment_configuration must have two parameters.') elif int(args.save_experiment_configuration[0]) not in list(range(1, 5)): raise ValueError('save_experiment_configuration first parameter must be a supported stage id (i.e. [1, 4]).') - output_experiment_configuration_path = os.path.join(args.experiment_configuration_path, - args.dataset_name, 'stage' + args.save_experiment_configuration[0], + output_experiment_stage_path = os.path.join(args.experiment_configuration_path, + args.dataset_name, 'stage' + args.save_experiment_configuration[0]) + pathlib.Path(output_experiment_stage_path).mkdir(parents=True, exist_ok=True) + output_experiment_configuration_path = os.path.join(output_experiment_stage_path, args.save_experiment_configuration[1] + '_{}.json'.format( experiment_id)) else: diff --git a/experiments/lfw_pairs/stage1/params.json b/experiments/lfw_pairs/stage1/params.json new file mode 100644 index 0000000..44037fe --- /dev/null +++ b/experiments/lfw_pairs/stage1/params.json @@ -0,0 +1,16 @@ +{ + "scorer": "accuracy", + "best_score_train": 0.6231060606060606, + "best_score_test": 0.6174242424242423, + "best_parameters": { + "min_samples_leaf": 1, + "n_estimators": 1000, + "max_depth": 16, + "max_features": "auto" + }, + "random_seed": [ + 226, + 674, + 1639 + ] +} \ No newline at end of file -- GitLab