Skip to content
Snippets Groups Projects
Commit 58061ea4 authored by Charly Lamothe's avatar Charly Lamothe
Browse files

- Add command lines for stage2 experiments;

- Fix possible issues for extracted forest sizes computation: around to reduce possible zeroes and remove duplicates;
- Create output experiment stage dir if not exists;
- Add base_score_metric to model raw results class;
- Add best params for lfw_pairs (maybe try with a larger number of random seeds since the score is not that high).
parent 17d3addc
No related branches found
No related tags found
1 merge request!9Resolve "Experiment pipeline"
...@@ -8,8 +8,8 @@ class ModelRawResults(object): ...@@ -8,8 +8,8 @@ class ModelRawResults(object):
def __init__(self, model_object, training_time, def __init__(self, model_object, training_time,
datetime, train_score, dev_score, test_score, datetime, train_score, dev_score, test_score,
score_metric, train_score_base, dev_score_base, train_score_base, dev_score_base,
test_score_base): test_score_base, score_metric, base_score_metric):
self._model_object = model_object self._model_object = model_object
self._training_time = training_time self._training_time = training_time
...@@ -17,10 +17,11 @@ class ModelRawResults(object): ...@@ -17,10 +17,11 @@ class ModelRawResults(object):
self._train_score = train_score self._train_score = train_score
self._dev_score = dev_score self._dev_score = dev_score
self._test_score = test_score self._test_score = test_score
self._score_metric = score_metric
self._train_score_base = train_score_base self._train_score_base = train_score_base
self._dev_score_base = dev_score_base self._dev_score_base = dev_score_base
self._test_score_base = test_score_base self._test_score_base = test_score_base
self._score_metric = score_metric
self._base_score_metric = base_score_metric
@property @property
def model_object(self): def model_object(self):
...@@ -46,10 +47,6 @@ class ModelRawResults(object): ...@@ -46,10 +47,6 @@ class ModelRawResults(object):
def test_score(self): def test_score(self):
return self._test_score return self._test_score
@property
def score_metric(self):
return self._score_metric
@property @property
def train_score_base(self): def train_score_base(self):
return self._train_score_base return self._train_score_base
...@@ -62,6 +59,14 @@ class ModelRawResults(object): ...@@ -62,6 +59,14 @@ class ModelRawResults(object):
def test_score_base(self): def test_score_base(self):
return self._test_score_base return self._test_score_base
@property
def score_metric(self):
return self._score_metric
@property
def base_score_metric(self):
return self._base_score_metric
def save(self, models_dir): def save(self, models_dir):
save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle', save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
self.__dict__) self.__dict__)
......
...@@ -101,13 +101,20 @@ def process_job(seed, parameters, experiment_id, hyperparameters): ...@@ -101,13 +101,20 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
""" """
Example for stage 1: Example for stage 1:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=none --save_experiment_configuration 1 none_with_params python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=random --save_experiment_configuration 1 random_with_params python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --save_experiment_configuration 1 omp_with_params python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --forest_size=1000 --extracted_forest_size_stop=0.05
python code/compute_results.py --stage 1 --experiment_ids 1 2 3 4 5 6 --dataset_name=california_housing --extracted_forest_sizes_number=5 python code/compute_results.py --stage 1 --experiment_ids 1 2 3 4 5 6 --dataset_name=california_housing --extracted_forest_sizes_number=5
Example for stage 2:
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 no_normalization --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D --normalize_D --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_weights --normalize_weights --extracted_forest_size_stop=0.05
python code/train.py --dataset_name=california_housing --seeds 1 2 3 4 5 --save_experiment_configuration 2 normalize_D_and_weights --normalize_D --normalize_weights --extracted_forest_size_stop=0.05
python code/compute_results.py --stage 2 --experiment_ids 7 8 9 10 --dataset_name=california_housing --extracted_forest_sizes_number=5
""" """
if __name__ == "__main__": if __name__ == "__main__":
load_dotenv(find_dotenv('.env')) load_dotenv(find_dotenv('.env'))
...@@ -184,10 +191,10 @@ if __name__ == "__main__": ...@@ -184,10 +191,10 @@ if __name__ == "__main__":
hyperparameters['n_estimators'] = parameters['forest_size'] hyperparameters['n_estimators'] = parameters['forest_size']
# The number of tree to extract from forest (K) # The number of tree to extract from forest (K)
parameters['extracted_forest_size'] = (hyperparameters['n_estimators'] * parameters['extracted_forest_size'] = np.unique(np.around(hyperparameters['n_estimators'] *
np.linspace(0, args.extracted_forest_size_stop, np.linspace(0, args.extracted_forest_size_stop,
parameters['extracted_forest_size_samples'] + 1, parameters['extracted_forest_size_samples'] + 1,
endpoint=False)[1:]).astype(np.int).tolist() endpoint=False)[1:]).astype(np.int)).tolist()
if parameters['seeds'] != None and parameters['random_seed_number'] > 1: if parameters['seeds'] != None and parameters['random_seed_number'] > 1:
logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.') logger.warning('seeds and random_seed_number parameters are both specified. Seeds will be used.')
...@@ -213,8 +220,10 @@ if __name__ == "__main__": ...@@ -213,8 +220,10 @@ if __name__ == "__main__":
raise ValueError('save_experiment_configuration must have two parameters.') raise ValueError('save_experiment_configuration must have two parameters.')
elif int(args.save_experiment_configuration[0]) not in list(range(1, 5)): elif int(args.save_experiment_configuration[0]) not in list(range(1, 5)):
raise ValueError('save_experiment_configuration first parameter must be a supported stage id (i.e. [1, 4]).') raise ValueError('save_experiment_configuration first parameter must be a supported stage id (i.e. [1, 4]).')
output_experiment_configuration_path = os.path.join(args.experiment_configuration_path, output_experiment_stage_path = os.path.join(args.experiment_configuration_path,
args.dataset_name, 'stage' + args.save_experiment_configuration[0], args.dataset_name, 'stage' + args.save_experiment_configuration[0])
pathlib.Path(output_experiment_stage_path).mkdir(parents=True, exist_ok=True)
output_experiment_configuration_path = os.path.join(output_experiment_stage_path,
args.save_experiment_configuration[1] + '_{}.json'.format( args.save_experiment_configuration[1] + '_{}.json'.format(
experiment_id)) experiment_id))
else: else:
......
{
"scorer": "accuracy",
"best_score_train": 0.6231060606060606,
"best_score_test": 0.6174242424242423,
"best_parameters": {
"min_samples_leaf": 1,
"n_estimators": 1000,
"max_depth": 16,
"max_features": "auto"
},
"random_seed": [
226,
674,
1639
]
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment