Commit 570cc719 authored by Léo Bouscarrat's avatar Léo Bouscarrat
Browse files

Add plot for without_weights

parent 1379c412
...@@ -68,10 +68,12 @@ class ModelRawResults(object): ...@@ -68,10 +68,12 @@ class ModelRawResults(object):
return self._base_score_metric return self._base_score_metric
def save(self, models_dir): def save(self, models_dir):
if not os.path.exists(models_dir):
os.mkdir(models_dir)
save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle', save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
self.__dict__) self.__dict__)
@staticmethod @staticmethod
def load(models_dir): def load(models_dir):
return load_obj_from_pickle(models_dir + os.sep + 'model_raw_results.pickle', return load_obj_from_pickle(models_dir + os.sep + 'model_raw_results.pickle',
ModelRawResults) ModelRawResults)
...@@ -24,7 +24,6 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -24,7 +24,6 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
return self._base_forest_estimator.score(X, y) return self._base_forest_estimator.score(X, y)
def _base_estimator_predictions(self, X): def _base_estimator_predictions(self, X):
# We need to use predict_proba to get the probabilities of each class
return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
@property @property
...@@ -123,3 +122,24 @@ class SingleOmpForest(OmpForest): ...@@ -123,3 +122,24 @@ class SingleOmpForest(OmpForest):
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights) return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)
def predict_no_weights(self, X):
"""
Apply the SingleOmpForest to X without using the weights.
Make all the base tree predictions
:param X: a Forest
:return: a np.array of the predictions of the entire forest
"""
forest_predictions = self._base_estimator_predictions(X).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
weights = self._omp.coef_
omp_trees_indices = np.nonzero(weights)
select_trees = np.mean(forest_predictions[omp_trees_indices], axis=0)
return select_trees
...@@ -106,6 +106,36 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -106,6 +106,36 @@ class OmpForestMulticlassClassifier(OmpForest):
max_preds = np.argmax(preds, axis=1) max_preds = np.argmax(preds, axis=1)
return np.array(label_names)[max_preds] return np.array(label_names)[max_preds]
def predict_no_weights(self, X):
"""
Apply the SingleOmpForest to X without using the weights.
Make all the base tree predictions
:param X: a Forest
:return: a np.array of the predictions of the entire forest
"""
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
label_names = []
preds = []
num_class = 0
for class_label, omp_class in self._dct_class_omp.items():
weights = omp_class.coef_
omp_trees_indices = np.nonzero(weights)
label_names.append(class_label)
atoms_binary = (forest_predictions[num_class].T - 0.5) * 2 # centré réduit de 0/1 à -1/1
preds.append(np.sum(atoms_binary[omp_trees_indices], axis=0))
num_class += 1
preds = np.array(preds).T
max_preds = np.argmax(preds, axis=1)
return np.array(label_names)[max_preds]
def score(self, X, y, metric=DEFAULT_SCORE_METRIC): def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
predictions = self.predict(X) predictions = self.predict(X)
......
...@@ -95,12 +95,18 @@ class Trainer(object): ...@@ -95,12 +95,18 @@ class Trainer(object):
) )
self._end_time = time.time() self._end_time = time.time()
def __score_func(self, model, X, y_true): def __score_func(self, model, X, y_true, weights=True):
if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]: if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
y_pred = model.predict(X) if weights:
y_pred = model.predict(X)
else:
y_pred = model.predict_no_weights(X)
result = self._regression_score_metric(y_true, y_pred) result = self._regression_score_metric(y_true, y_pred)
elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]: elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]:
y_pred = model.predict(X) if weights:
y_pred = model.predict(X)
else:
y_pred = model.predict_no_weights(X)
if type(model) is OmpForestBinaryClassifier: if type(model) is OmpForestBinaryClassifier:
y_pred = y_pred.round() y_pred = y_pred.round()
result = self._classification_score_metric(y_true, y_pred) result = self._classification_score_metric(y_true, y_pred)
...@@ -148,3 +154,29 @@ class Trainer(object): ...@@ -148,3 +154,29 @@ class Trainer(object):
self._logger.info("Base performance on dev: {}".format(results.dev_score_base)) self._logger.info("Base performance on dev: {}".format(results.dev_score_base))
self._logger.info("Performance on dev: {}".format(results.dev_score)) self._logger.info("Performance on dev: {}".format(results.dev_score))
if type(model) not in [RandomForestRegressor, RandomForestClassifier]:
results = ModelRawResults(
model_object='',
training_time=self._end_time - self._begin_time,
datetime=datetime.datetime.now(),
train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train, False),
dev_score=self.__score_func(model, self._dataset.X_dev, self._dataset.y_dev, False),
test_score=self.__score_func(model, self._dataset.X_test, self._dataset.y_test, False),
train_score_base=self.__score_func_base(model, self._dataset.X_train, self._dataset.y_train),
dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
score_metric=self._score_metric_name,
base_score_metric=self._base_score_metric_name
)
results.save(models_dir+'_no_weights')
self._logger.info("Base performance on test without weights: {}".format(results.test_score_base))
self._logger.info("Performance on test: {}".format(results.test_score))
self._logger.info("Base performance on train without weights: {}".format(results.train_score_base))
self._logger.info("Performance on train: {}".format(results.train_score))
self._logger.info("Base performance on dev without weights: {}".format(results.dev_score_base))
self._logger.info("Performance on dev: {}".format(results.dev_score))
...@@ -109,22 +109,23 @@ class Plotter(object): ...@@ -109,22 +109,23 @@ class Plotter(object):
fig, ax = plt.subplots() fig, ax = plt.subplots()
n = len(all_experiment_scores) nb_experiments = len(all_experiment_scores)
""" """
Get as many different colors from the specified cmap (here nipy_spectral) Get as many different colors from the specified cmap (here nipy_spectral)
as there are curve to plot. as there are curve to plot.
""" """
colors = Plotter.get_colors_from_cmap(n) colors = Plotter.get_colors_from_cmap(nb_experiments)
# For each curve to plot # For each curve to plot
for i in range(n): for i in range(nb_experiments):
# Retreive the scores in a list for each seed # Retreive the scores in a list for each seed
experiment_scores = list(all_experiment_scores[i].values()) experiment_scores = list(all_experiment_scores[i].values())
# Compute the mean and the std for the CI # Compute the mean and the std for the CI
mean_experiment_scores = np.average(experiment_scores, axis=0) mean_experiment_scores = np.average(experiment_scores, axis=0)
std_experiment_scores = np.std(experiment_scores, axis=0) std_experiment_scores = np.std(experiment_scores, axis=0)
# Plot the score curve with the CI # Plot the score curve with the CI
print(len(mean_experiment_scores))
Plotter.plot_mean_and_CI( Plotter.plot_mean_and_CI(
ax=ax, ax=ax,
mean=mean_experiment_scores, mean=mean_experiment_scores,
......
...@@ -17,7 +17,7 @@ def retreive_extracted_forest_sizes_number(models_dir, experiment_id): ...@@ -17,7 +17,7 @@ def retreive_extracted_forest_sizes_number(models_dir, experiment_id):
extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes'
return len(os.listdir(extracted_forest_sizes_root_path)) return len(os.listdir(extracted_forest_sizes_root_path))
def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id): def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id, weights=True):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
...@@ -47,11 +47,15 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -47,11 +47,15 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path) extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ]
extracted_forest_sizes.sort(key=int) extracted_forest_sizes.sort(key=int)
all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes))) all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes)))
for extracted_forest_size in extracted_forest_sizes: for extracted_forest_size in extracted_forest_sizes:
# models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size} # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size if weights:
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
else:
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size + '_no_weights'
# Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
model_raw_results = ModelRawResults.load(extracted_forest_size_path) model_raw_results = ModelRawResults.load(extracted_forest_size_path)
# Save the scores # Save the scores
...@@ -350,6 +354,11 @@ if __name__ == "__main__": ...@@ -350,6 +354,11 @@ if __name__ == "__main__":
omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \ omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2]) args.models_dir, args.results_dir, args.experiment_ids[2])
#omp_with_params_without_weights
logger.info('Loading omp_with_params experiment scores...')
omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
"""# base_with_params """# base_with_params
logger.info('Loading base_with_params experiment scores 2...') logger.info('Loading base_with_params experiment scores 2...')
...@@ -374,8 +383,9 @@ if __name__ == "__main__": ...@@ -374,8 +383,9 @@ if __name__ == "__main__":
Plotter.plot_stage2_losses( Plotter.plot_stage2_losses(
file_path=output_path + os.sep + 'losses.png', file_path=output_path + os.sep + 'losses.png',
all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores], all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
all_labels=['base', 'random', 'omp'], omp_with_params_without_weights_test_scores],
all_labels=['base', 'random', 'omp', 'omp_without_weights'],
x_value=with_params_extracted_forest_sizes, x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
......
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
58, 1,
43535, 2,
234234 3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
58, 1,
43535, 2,
234234 3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
results/boston/stage4/losses.png

43.7 KB | W: | H:

results/boston/stage4/losses.png

110 KB | W: | H:

results/boston/stage4/losses.png
results/boston/stage4/losses.png
results/boston/stage4/losses.png
results/boston/stage4/losses.png
  • 2-up
  • Swipe
  • Onion skin
results/iris/stage1/losses.png

64.7 KB | W: | H:

results/iris/stage1/losses.png

66.1 KB | W: | H:

results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
  • 2-up
  • Swipe
  • Onion skin
for dataset in diamonds california_housing boston iris diabetes digits linnerud wine breast_cancer olivetti_faces 20newsgroups_vectorized lfw_pairs seeds='1 2 3'
for dataset in boston iris
do do
python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=$dataset --models_dir=models/$dataset/stage1 python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 4 none_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=1 --models_dir=models/$dataset/stage4 --subsets_used train+dev,train+dev
python code/compute_results.py --stage=2 --experiment_ids 1 2 3 4 --dataset_name=$dataset --models_dir=models/$dataset/stage2 python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 4 random_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=2 --models_dir=models/$dataset/stage4 --subsets_used train+dev,train+dev
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage3 python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=omp --save_experiment_configuration 4 omp_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=3 --models_dir=models/$dataset/stage4 --subsets_used train+dev,train+dev
python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4 python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4
done done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment