Skip to content
Snippets Groups Projects
Commit 3f85841e authored by Charly Lamothe's avatar Charly Lamothe
Browse files

Add coherence computation

parent 4d4c0848
No related branches found
No related tags found
1 merge request!22WIP: Resolve "coherence des arbres de predictions"
......@@ -9,7 +9,8 @@ class ModelRawResults(object):
def __init__(self, model_weights, training_time,
datetime, train_score, dev_score, test_score,
train_score_base, dev_score_base,
test_score_base, score_metric, base_score_metric):
test_score_base, score_metric, base_score_metric,
coherence=''):
self._model_weights = model_weights
self._training_time = training_time
......@@ -22,6 +23,7 @@ class ModelRawResults(object):
self._test_score_base = test_score_base
self._score_metric = score_metric
self._base_score_metric = base_score_metric
self._coherence = coherence
@property
def model_weights(self):
......@@ -67,6 +69,10 @@ class ModelRawResults(object):
def base_score_metric(self):
return self._base_score_metric
@property
def coherence(self):
return self._coherence
def save(self, models_dir):
if not os.path.exists(models_dir):
os.mkdir(models_dir)
......
......@@ -141,6 +141,32 @@ class Trainer(object):
result = self._base_regression_score_metric(y_true, y_pred)
return result
def compute_preds_coherence(self, model, X):
from sklearn.preprocessing import normalize
import itertools
if type(model) in [OmpForestRegressor, SimilarityForestRegressor, KMeansForestRegressor, EnsembleSelectionForestRegressor]:
estimators = model.forest
elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier]:
estimators = model.forest
elif type(model) in [RandomForestRegressor, RandomForestClassifier]:
estimators = model.estimators_
predictions = list()
for ti in estimators:
predictions.append(ti.predict(X))
predictions = normalize(predictions)
"""similarities = list()
for ti, tj in list(itertools.combinations(predictions, 2)):
similarities.append(np.abs(ti @ tj))
coherence = np.max(np.asarray(similarities))"""
coherence = np.max(np.abs((predictions @ predictions.T - np.eye(len(predictions)))))
return coherence
def compute_results(self, model, models_dir):
"""
:param model: Object with
......@@ -173,7 +199,8 @@ class Trainer(object):
dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
score_metric=self._score_metric_name,
base_score_metric=self._base_score_metric_name
base_score_metric=self._base_score_metric_name,
coherence=self.compute_preds_coherence(model, self._dataset.X_train)
)
results.save(models_dir)
self._logger.info("Base performance on test: {}".format(results.test_score_base))
......
......@@ -148,6 +148,34 @@ def extract_weights_across_seeds(models_dir, results_dir, experiment_id):
return experiment_weights
def extract_coherences_across_seeds(models_dir, results_dir, experiment_id):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
experiment_coherences = dict()
# For each seed results stored in models/{experiment_id}/seeds
seeds = os.listdir(experiment_seed_root_path)
seeds.sort(key=int)
for seed in seeds:
experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' # models/{experiment_id}/seeds/{seed}/forest_size
# {{seed}:[]}
experiment_coherences[seed] = list()
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ]
extracted_forest_sizes.sort(key=int)
for extracted_forest_size in extracted_forest_sizes:
# models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
# Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
model_raw_results = ModelRawResults.load(extracted_forest_size_path)
# Save the weights
experiment_coherences[seed].append(model_raw_results.coherence)
return experiment_coherences
if __name__ == "__main__":
# get environment variables in .env
......@@ -493,7 +521,7 @@ if __name__ == "__main__":
ylabel=base_with_params_experiment_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
if args.plot_weight_density:
if args.plot_weight_density or args.plot_preds_coherence:
root_output_path = os.path.join(args.results_dir, args.dataset_name, f'stage{args.stage}')
if args.stage == 1:
......@@ -526,8 +554,25 @@ if __name__ == "__main__":
omp_experiment_ids.append((label, current_experiment_id))
for (experiment_label, experiment_id) in omp_experiment_ids:
if args.plot_weight_density:
logger.info(f'Computing weight density plot for experiment {experiment_label}...')
experiment_weights = extract_weights_across_seeds(args.models_dir, args.results_dir, experiment_id)
Plotter.weight_density(experiment_weights, os.path.join(root_output_path, f'weight_density_{experiment_label}.png'))
if args.plot_preds_coherence:
all_labels = ['random', 'omp']
random_with_params_train_scores, random_with_params_dev_scores, random_with_params_test_scores, \
with_params_extracted_forest_sizes, random_with_params_experiment_score_metric = \
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, int(args.experiment_ids[1]))
coherence_values = [extract_coherences_across_seeds(args.models_dir, args.results_dir, i) for i in range(2, 4)]
print(coherence_values[1])
Plotter.plot_stage2_losses(
file_path=root_output_path + os.sep + f"coherences_{'-'.join(all_labels)}.png",
all_experiment_scores=coherence_values,
all_labels=all_labels,
x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted',
ylabel='Coherence',
title='Coherence values of {}'.format(args.dataset_name))
logger.info(f'Computing preds coherence plot for experiment {experiment_label}...')
logger.info('Done.')
......@@ -121,6 +121,7 @@ def seed_job(seed_job_pb, seed, parameters, experiment_id, hyperparameters, verb
trainer.init(model, subsets_used=parameters['subsets_used'])
trainer.train(model)
#trainer.compute_preds_coherence(model)
trainer.compute_results(model, sub_models_dir)
logger.info(f'Training done for seed {seed_str}')
seed_job_pb.update(1)
......@@ -169,6 +170,7 @@ def extracted_forest_size_job(extracted_forest_size_job_pb, extracted_forest_siz
trainer.init(model, subsets_used=parameters['subsets_used'])
trainer.train(model, extracted_forest_size=extracted_forest_size)
#trainer.compute_preds_coherence(model)
trainer.compute_results(model, sub_models_dir)
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment