Commit 994ff6ba authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Update fix of normalize_D in the case of wo weights. Add density plot in...

Update fix of normalize_D in the case of wo weights. Add density plot in compute results for all stages and add some plots in results.
parent a1a7f767
...@@ -123,9 +123,7 @@ class SingleOmpForest(OmpForest): ...@@ -123,9 +123,7 @@ class SingleOmpForest(OmpForest):
forest_predictions = self._base_estimator_predictions(X) forest_predictions = self._base_estimator_predictions(X)
if self._models_parameters.normalize_D: if self._models_parameters.normalize_D:
forest_predictions = forest_predictions.T
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
forest_predictions = forest_predictions.T
return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights) return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)
......
...@@ -119,9 +119,7 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -119,9 +119,7 @@ class OmpForestMulticlassClassifier(OmpForest):
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D: if self._models_parameters.normalize_D:
forest_predictions = forest_predictions.T
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
forest_predictions = forest_predictions.T
label_names = [] label_names = []
preds = [] preds = []
...@@ -149,7 +147,9 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -149,7 +147,9 @@ class OmpForestMulticlassClassifier(OmpForest):
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D: if self._models_parameters.normalize_D:
forest_predictions = forest_predictions.T
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
forest_predictions = forest_predictions.T
label_names = [] label_names = []
preds = [] preds = []
......
...@@ -156,6 +156,7 @@ if __name__ == "__main__": ...@@ -156,6 +156,7 @@ if __name__ == "__main__":
DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results' DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models' DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
DEFAULT_PLOT_WEIGHT_DENSITY = False DEFAULT_PLOT_WEIGHT_DENSITY = False
DEFAULT_WO_LOSS_PLOTS = False
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].') parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].')
...@@ -168,6 +169,7 @@ if __name__ == "__main__": ...@@ -168,6 +169,7 @@ if __name__ == "__main__":
parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.') parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.') parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
parser.add_argument('--plot_weight_density', action='store_true', default=DEFAULT_PLOT_WEIGHT_DENSITY, help='Plot the weight density. Only working for regressor models for now.') parser.add_argument('--plot_weight_density', action='store_true', default=DEFAULT_PLOT_WEIGHT_DENSITY, help='Plot the weight density. Only working for regressor models for now.')
parser.add_argument('--wo_loss_plots', action='store_true', default=DEFAULT_WO_LOSS_PLOTS, help='Do not compute the loss plots.')
args = parser.parse_args() args = parser.parse_args()
if args.stage not in list(range(1, 6)): if args.stage not in list(range(1, 6)):
...@@ -181,7 +183,7 @@ if __name__ == "__main__": ...@@ -181,7 +183,7 @@ if __name__ == "__main__":
# Create recursively the results dir tree # Create recursively the results dir tree
pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True) pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True)
if args.stage == 1: if args.stage == 1 and not args.wo_loss_plots:
if len(args.experiment_ids) != 6: if len(args.experiment_ids) != 6:
raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.') raise ValueError('In the case of stage 1, the number of specified experiment ids must be 6.')
...@@ -221,8 +223,8 @@ if __name__ == "__main__": ...@@ -221,8 +223,8 @@ if __name__ == "__main__":
wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \ wo_params_extracted_forest_sizes, random_wo_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes( extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, int(args.experiment_ids[4])) args.models_dir, args.results_dir, int(args.experiment_ids[4]))
# base_wo_params # omp_wo_params
logger.info('Loading base_wo_params experiment scores...') logger.info('Loading omp_wo_params experiment scores...')
omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \ omp_wo_params_train_scores, omp_wo_params_dev_scores, omp_wo_params_test_scores, _, \
omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( omp_wo_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, int(args.experiment_ids[5])) args.models_dir, args.results_dir, int(args.experiment_ids[5]))
...@@ -262,7 +264,7 @@ if __name__ == "__main__": ...@@ -262,7 +264,7 @@ if __name__ == "__main__":
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
title='Loss values of {}\nusing best and default hyperparameters'.format(args.dataset_name) title='Loss values of {}\nusing best and default hyperparameters'.format(args.dataset_name)
) )
elif args.stage == 2: elif args.stage == 2 and not args.wo_loss_plots:
if len(args.experiment_ids) != 4: if len(args.experiment_ids) != 4:
raise ValueError('In the case of stage 2, the number of specified experiment ids must be 4.') raise ValueError('In the case of stage 2, the number of specified experiment ids must be 4.')
...@@ -308,7 +310,7 @@ if __name__ == "__main__": ...@@ -308,7 +310,7 @@ if __name__ == "__main__":
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
title='Loss values of {}\nusing different normalizations'.format(args.dataset_name)) title='Loss values of {}\nusing different normalizations'.format(args.dataset_name))
elif args.stage == 3: elif args.stage == 3 and not args.wo_loss_plots:
if len(args.experiment_ids) != 3: if len(args.experiment_ids) != 3:
raise ValueError('In the case of stage 3, the number of specified experiment ids must be 3.') raise ValueError('In the case of stage 3, the number of specified experiment ids must be 3.')
...@@ -365,7 +367,7 @@ if __name__ == "__main__": ...@@ -365,7 +367,7 @@ if __name__ == "__main__":
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
title='Loss values of {}\nusing different training subsets'.format(args.dataset_name))""" title='Loss values of {}\nusing different training subsets'.format(args.dataset_name))"""
elif args.stage == 4: elif args.stage == 4 and not args.wo_loss_plots:
if len(args.experiment_ids) != 3: if len(args.experiment_ids) != 3:
raise ValueError('In the case of stage 4, the number of specified experiment ids must be 3.') raise ValueError('In the case of stage 4, the number of specified experiment ids must be 3.')
...@@ -427,11 +429,7 @@ if __name__ == "__main__": ...@@ -427,11 +429,7 @@ if __name__ == "__main__":
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
elif args.stage == 5 and not args.wo_loss_plots:
experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, args.experiment_ids[2])
Plotter.weight_density(experiment_weights, os.path.join(output_path, 'weight_density.png'))
elif args.stage == 5:
# Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary # Retreive the extracted forest sizes number used in order to have a base forest axis as long as necessary
extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1])) extracted_forest_sizes_number = retreive_extracted_forest_sizes_number(args.models_dir, int(args.experiment_ids[1]))
all_labels = list() all_labels = list()
...@@ -475,8 +473,9 @@ if __name__ == "__main__": ...@@ -475,8 +473,9 @@ if __name__ == "__main__":
continue continue
logger.info(f'Loading {label} experiment scores...') logger.info(f'Loading {label} experiment scores...')
current_experiment_id = int(args.experiment_ids[i].split('=')[1])
_, _, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes( _, _, current_test_scores, _, _ = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, int(args.experiment_ids[i].split('=')[1])) args.models_dir, args.results_dir, current_experiment_id)
all_labels.append(label) all_labels.append(label)
all_scores.append(current_test_scores) all_scores.append(current_test_scores)
...@@ -491,7 +490,42 @@ if __name__ == "__main__": ...@@ -491,7 +490,42 @@ if __name__ == "__main__":
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=base_with_params_experiment_score_metric, ylabel=base_with_params_experiment_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
else:
raise ValueError('This stage number is not supported yet, but it will be!') if args.plot_weight_density:
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}')
if args.stage == 1:
omp_experiment_ids = [('omp_with_params', args.experiment_ids[2]), ('omp_wo_params', args.experiment_ids[2])]
elif args.stage == 2:
omp_experiment_ids = [('no_normalization', args.experiment_ids[0]),
('normalize_D', args.experiment_ids[1]),
('normalize_weights', args.experiment_ids[2]),
('normalize_D_and_weights', args.experiment_ids[3])]
elif args.stage == 3:
omp_experiment_ids = [('train-dev_subset', args.experiment_ids[0]),
('train-dev_train-dev_subset', args.experiment_ids[1]),
('train-train-dev_subset', args.experiment_ids[2])]
elif args.stage == 4:
omp_experiment_ids = [('omp_with_params', args.experiment_ids[2])]
elif args.stage == 5:
omp_experiment_ids = [('omp_with_params', args.experiment_ids[2])]
for i in range(3, len(args.experiment_ids)):
if 'kmeans' in args.experiment_ids[i]:
label = 'kmeans'
elif 'similarity' in args.experiment_ids[i]:
label = 'similarity'
elif 'ensemble' in args.experiment_ids[i]:
label = 'ensemble'
else:
logger.error('Invalid value encountered')
continue
current_experiment_id = int(args.experiment_ids[i].split('=')[1])
omp_experiment_ids.append((label, current_experiment_id))
for (experiment_label, experiment_id) in omp_experiment_ids:
logger.info(f'Computing weight density plot for experiment {experiment_label}...')
experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id)
Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png'))
logger.info('Done.') logger.info('Done.')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment