Skip to content
Snippets Groups Projects
Commit 58061ea4 authored by Charly Lamothe's avatar Charly Lamothe
Browse files

- Add command lines for stage2 experiments;

- Fix possible issues for extracted forest sizes computation: around to reduce possible zeroes and remove duplicates;
- Create output experiment stage dir if not exists;
- Add base_score_metric to model raw results class;
- Add best params for lfw_pairs (maybe try with a larger number of random seeds since the score is not that high).
parent 17d3addc
No related branches found
No related tags found
1 merge request!9Resolve "Experiment pipeline"
......@@ -8,8 +8,8 @@ class ModelRawResults(object):
def __init__(self, model_object, training_time,
datetime, train_score, dev_score, test_score,
score_metric, train_score_base, dev_score_base,
test_score_base):
train_score_base, dev_score_base,
test_score_base, score_metric, base_score_metric):
self._model_object = model_object
self._training_time = training_time
......@@ -17,10 +17,11 @@ class ModelRawResults(object):
self._train_score = train_score
self._dev_score = dev_score
self._test_score = test_score
self._score_metric = score_metric
self._train_score_base = train_score_base
self._dev_score_base = dev_score_base
self._test_score_base = test_score_base
self._score_metric = score_metric
self._base_score_metric = base_score_metric
@property
def model_object(self):
......@@ -46,10 +47,6 @@ class ModelRawResults(object):
def test_score(self):
return self._test_score
@property
def score_metric(self):
return self._score_metric
@property
def train_score_base(self):
return self._train_score_base
......@@ -62,6 +59,14 @@ class ModelRawResults(object):
def test_score_base(self):
return self._test_score_base
@property
def score_metric(self):
return self._score_metric
@property
def base_score_metric(self):
return self._base_score_metric
def save(self, models_dir):
save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
self.__dict__)
......
......@@ -101,13 +101,20 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
"""
Example for stage 1:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=none --save_experiment_configuration 1 none_with_params
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=random --save_experiment_configuration 1 random_with_params
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --save_experiment_configuration 1 omp_with_params
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05
python code/compute_results.py --stage 1 --experiment_ids 1 2 3 4 5 6 --dataset_name=california_housing --extracted_forest_sizes_number=5
Example for stage 2:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 no_normalization --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D --normalize_D --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_weights --normalize_weights --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D_and_weights --normalize_D --normalize_weights --extracted_forest_size_stop=0.05
python code/compute_results.py --stage 2 --experiment_ids 7 8 9 10 --dataset_name=california_housing --extracted_forest_sizes_number=5
"""
if __name__ == "__main__":
load_dotenv(find_dotenv('.env'))
......@@ -184,10 +191,10 @@ if __name__ == "__main__":
hyperparameters['n_estimators'] = parameters['forest_size']
# The number of tree to extract from forest (K)
parameters['extracted_forest_size'] = (hyperparameters['n_estimators'] *
parameters['extracted_forest_size'] = np.unique(np.around(hyperparameters['n_estimators'] *
np.linspace(0, args.extracted_forest_size_stop,
parameters['extracted_forest_size_samples'] + 1,
endpoint=False)[1:]).astype(np.int).tolist()
endpoint=False)[1:]).astype(np.int)).tolist()
if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')
......@@ -213,8 +220,10 @@ if __name__ == "__main__":
raise ValueError('save_experiment_configuration must have two parameters.')
elif int(args.save_experiment_configuration[0]) not in list(range(1, 5)):
raise ValueError('save_experiment_configuration first parameter must be a supported stage id (i.e. [1, 4]).')
output_experiment_configuration_path = os.path.join(args.experiment_configuration_path,
args.dataset_name, 'stage' + args.save_experiment_configuration[0],
output_experiment_stage_path = os.path.join(args.experiment_configuration_path,
args.dataset_name, 'stage' + args.save_experiment_configuration[0])
pathlib.Path(output_experiment_stage_path).mkdir(parents=True, exist_ok=True)
output_experiment_configuration_path = os.path.join(output_experiment_stage_path,
args.save_experiment_configuration[1] + '_{}.json'.format(
experiment_id))
else:
......
{
"scorer": "accuracy",
"best_score_train": 0.6231060606060606,
"best_score_test": 0.6174242424242423,
"best_parameters": {
"min_samples_leaf": 1,
"n_estimators": 1000,
"max_depth": 16,
"max_features": "auto"
},
"random_seed": [
226,
674,
1639
]
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment