Skip to content
Snippets Groups Projects
Commit 570cc719 authored by Léo Bouscarrat's avatar Léo Bouscarrat
Browse files

Add plot for without_weights

parent 1379c412
Branches 14-correction-of-multiclass-classif
No related tags found
2 merge requests!14Mistakes have been made,!13Resolve "Add OMP no weight"
This commit is part of merge request !13. Comments created here will be created in the context of that merge request.
...@@ -68,6 +68,8 @@ class ModelRawResults(object): ...@@ -68,6 +68,8 @@ class ModelRawResults(object):
return self._base_score_metric return self._base_score_metric
def save(self, models_dir): def save(self, models_dir):
if not os.path.exists(models_dir):
os.mkdir(models_dir)
save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle', save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
self.__dict__) self.__dict__)
......
...@@ -24,7 +24,6 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -24,7 +24,6 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
return self._base_forest_estimator.score(X, y) return self._base_forest_estimator.score(X, y)
def _base_estimator_predictions(self, X): def _base_estimator_predictions(self, X):
# We need to use predict_proba to get the probabilities of each class
return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
@property @property
...@@ -123,3 +122,24 @@ class SingleOmpForest(OmpForest): ...@@ -123,3 +122,24 @@ class SingleOmpForest(OmpForest):
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights) return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)
def predict_no_weights(self, X):
"""
Apply the SingleOmpForest to X without using the weights.
Make all the base tree predictions
:param X: a Forest
:return: a np.array of the predictions of the entire forest
"""
forest_predictions = self._base_estimator_predictions(X).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
weights = self._omp.coef_
omp_trees_indices = np.nonzero(weights)
select_trees = np.mean(forest_predictions[omp_trees_indices], axis=0)
return select_trees
...@@ -106,6 +106,36 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -106,6 +106,36 @@ class OmpForestMulticlassClassifier(OmpForest):
max_preds = np.argmax(preds, axis=1) max_preds = np.argmax(preds, axis=1)
return np.array(label_names)[max_preds] return np.array(label_names)[max_preds]
def predict_no_weights(self, X):
"""
Apply the SingleOmpForest to X without using the weights.
Make all the base tree predictions
:param X: a Forest
:return: a np.array of the predictions of the entire forest
"""
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
label_names = []
preds = []
num_class = 0
for class_label, omp_class in self._dct_class_omp.items():
weights = omp_class.coef_
omp_trees_indices = np.nonzero(weights)
label_names.append(class_label)
atoms_binary = (forest_predictions[num_class].T - 0.5) * 2 # centré réduit de 0/1 à -1/1
preds.append(np.sum(atoms_binary[omp_trees_indices], axis=0))
num_class += 1
preds = np.array(preds).T
max_preds = np.argmax(preds, axis=1)
return np.array(label_names)[max_preds]
def score(self, X, y, metric=DEFAULT_SCORE_METRIC): def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
predictions = self.predict(X) predictions = self.predict(X)
......
...@@ -95,12 +95,18 @@ class Trainer(object): ...@@ -95,12 +95,18 @@ class Trainer(object):
) )
self._end_time = time.time() self._end_time = time.time()
def __score_func(self, model, X, y_true): def __score_func(self, model, X, y_true, weights=True):
if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]: if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
if weights:
y_pred = model.predict(X) y_pred = model.predict(X)
else:
y_pred = model.predict_no_weights(X)
result = self._regression_score_metric(y_true, y_pred) result = self._regression_score_metric(y_true, y_pred)
elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]: elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]:
if weights:
y_pred = model.predict(X) y_pred = model.predict(X)
else:
y_pred = model.predict_no_weights(X)
if type(model) is OmpForestBinaryClassifier: if type(model) is OmpForestBinaryClassifier:
y_pred = y_pred.round() y_pred = y_pred.round()
result = self._classification_score_metric(y_true, y_pred) result = self._classification_score_metric(y_true, y_pred)
...@@ -148,3 +154,29 @@ class Trainer(object): ...@@ -148,3 +154,29 @@ class Trainer(object):
self._logger.info("Base performance on dev: {}".format(results.dev_score_base)) self._logger.info("Base performance on dev: {}".format(results.dev_score_base))
self._logger.info("Performance on dev: {}".format(results.dev_score)) self._logger.info("Performance on dev: {}".format(results.dev_score))
if type(model) not in [RandomForestRegressor, RandomForestClassifier]:
results = ModelRawResults(
model_object='',
training_time=self._end_time - self._begin_time,
datetime=datetime.datetime.now(),
train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train, False),
dev_score=self.__score_func(model, self._dataset.X_dev, self._dataset.y_dev, False),
test_score=self.__score_func(model, self._dataset.X_test, self._dataset.y_test, False),
train_score_base=self.__score_func_base(model, self._dataset.X_train, self._dataset.y_train),
dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
score_metric=self._score_metric_name,
base_score_metric=self._base_score_metric_name
)
results.save(models_dir+'_no_weights')
self._logger.info("Base performance on test without weights: {}".format(results.test_score_base))
self._logger.info("Performance on test: {}".format(results.test_score))
self._logger.info("Base performance on train without weights: {}".format(results.train_score_base))
self._logger.info("Performance on train: {}".format(results.train_score))
self._logger.info("Base performance on dev without weights: {}".format(results.dev_score_base))
self._logger.info("Performance on dev: {}".format(results.dev_score))
...@@ -109,22 +109,23 @@ class Plotter(object): ...@@ -109,22 +109,23 @@ class Plotter(object):
fig, ax = plt.subplots() fig, ax = plt.subplots()
n = len(all_experiment_scores) nb_experiments = len(all_experiment_scores)
""" """
Get as many different colors from the specified cmap (here nipy_spectral) Get as many different colors from the specified cmap (here nipy_spectral)
as there are curve to plot. as there are curve to plot.
""" """
colors = Plotter.get_colors_from_cmap(n) colors = Plotter.get_colors_from_cmap(nb_experiments)
# For each curve to plot # For each curve to plot
for i in range(n): for i in range(nb_experiments):
# Retreive the scores in a list for each seed # Retreive the scores in a list for each seed
experiment_scores = list(all_experiment_scores[i].values()) experiment_scores = list(all_experiment_scores[i].values())
# Compute the mean and the std for the CI # Compute the mean and the std for the CI
mean_experiment_scores = np.average(experiment_scores, axis=0) mean_experiment_scores = np.average(experiment_scores, axis=0)
std_experiment_scores = np.std(experiment_scores, axis=0) std_experiment_scores = np.std(experiment_scores, axis=0)
# Plot the score curve with the CI # Plot the score curve with the CI
print(len(mean_experiment_scores))
Plotter.plot_mean_and_CI( Plotter.plot_mean_and_CI(
ax=ax, ax=ax,
mean=mean_experiment_scores, mean=mean_experiment_scores,
......
...@@ -17,7 +17,7 @@ def retreive_extracted_forest_sizes_number(models_dir, experiment_id): ...@@ -17,7 +17,7 @@ def retreive_extracted_forest_sizes_number(models_dir, experiment_id):
extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes'
return len(os.listdir(extracted_forest_sizes_root_path)) return len(os.listdir(extracted_forest_sizes_root_path))
def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id): def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id, weights=True):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
...@@ -47,11 +47,15 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -47,11 +47,15 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path) extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ]
extracted_forest_sizes.sort(key=int) extracted_forest_sizes.sort(key=int)
all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes))) all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes)))
for extracted_forest_size in extracted_forest_sizes: for extracted_forest_size in extracted_forest_sizes:
# models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size} # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
if weights:
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
else:
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size + '_no_weights'
# Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
model_raw_results = ModelRawResults.load(extracted_forest_size_path) model_raw_results = ModelRawResults.load(extracted_forest_size_path)
# Save the scores # Save the scores
...@@ -350,6 +354,11 @@ if __name__ == "__main__": ...@@ -350,6 +354,11 @@ if __name__ == "__main__":
omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \ omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2]) args.models_dir, args.results_dir, args.experiment_ids[2])
#omp_with_params_without_weights
logger.info('Loading omp_with_params experiment scores...')
omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
"""# base_with_params """# base_with_params
logger.info('Loading base_with_params experiment scores 2...') logger.info('Loading base_with_params experiment scores 2...')
...@@ -374,8 +383,9 @@ if __name__ == "__main__": ...@@ -374,8 +383,9 @@ if __name__ == "__main__":
Plotter.plot_stage2_losses( Plotter.plot_stage2_losses(
file_path=output_path + os.sep + 'losses.png', file_path=output_path + os.sep + 'losses.png',
all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores], all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
all_labels=['base', 'random', 'omp'], omp_with_params_without_weights_test_scores],
all_labels=['base', 'random', 'omp', 'omp_without_weights'],
x_value=with_params_extracted_forest_sizes, x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
......
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
58, 1,
43535, 2,
234234 3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
58, 1,
43535, 2,
234234 3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
results/boston/stage4/losses.png

43.7 KiB | W: | H:

results/boston/stage4/losses.png

110 KiB | W: | H:

results/boston/stage4/losses.png
results/boston/stage4/losses.png
results/boston/stage4/losses.png
results/boston/stage4/losses.png
  • 2-up
  • Swipe
  • Onion skin
results/iris/stage1/losses.png

64.7 KiB | W: | H:

results/iris/stage1/losses.png

66.1 KiB | W: | H:

results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
  • 2-up
  • Swipe
  • Onion skin
for dataset in diamonds california_housing boston iris diabetes digits linnerud wine breast_cancer olivetti_faces 20newsgroups_vectorized lfw_pairs seeds='1 2 3'
for dataset in boston iris
do do
python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=$dataset --models_dir=models/$dataset/stage1 python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 4 none_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=1 --models_dir=models/$dataset/stage4 --subsets_used train+dev,train+dev
python code/compute_results.py --stage=2 --experiment_ids 1 2 3 4 --dataset_name=$dataset --models_dir=models/$dataset/stage2 python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 4 random_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=2 --models_dir=models/$dataset/stage4 --subsets_used train+dev,train+dev
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage3 python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=omp --save_experiment_configuration 4 omp_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=3 --models_dir=models/$dataset/stage4 --subsets_used train+dev,train+dev
python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4 python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4
done done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment