diff --git a/.gitignore b/.gitignore index be95874710f5ee55a4e0104c6f78ff8a21abb705..be20546afc6e7213216d7be700119cef0ee9d32b 100644 --- a/.gitignore +++ b/.gitignore @@ -370,3 +370,6 @@ TSWLatexianTemp* *.lpz reports/*.pdf + +# Image +*.png diff --git a/code/bolsonaro/trainer.py b/code/bolsonaro/trainer.py index 01d0a036c8058cb55f947c0d80645b5619da0cdb..dcc16e0b81f79bc203a2f547459b009b5ef44d4d 100644 --- a/code/bolsonaro/trainer.py +++ b/code/bolsonaro/trainer.py @@ -30,6 +30,8 @@ class Trainer(object): y_omp = y_forest self._logger.debug('Fitting both the forest and OMP on train+dev subsets.') + # TODO: add an option to train forest to train+dev and OMP to dev + model.fit( X_forest=X_forest, y_forest=y_forest, diff --git a/code/bolsonaro/visualization/plotter.py b/code/bolsonaro/visualization/plotter.py index 9d7058732970fb5981ef04ce7a56e022ee68d5a9..86a906daa1576943f98edc9b73b965d2b2bac608 100644 --- a/code/bolsonaro/visualization/plotter.py +++ b/code/bolsonaro/visualization/plotter.py @@ -1,26 +1,29 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.neighbors.kde import KernelDensity +import pandas as pd class Plotter(object): @staticmethod - def weight_density(weights, X, file_path): - X_plot = [np.exp(elem) for elem in weights] - fig, ax = plt.subplots() + def weight_density(all_experiment_weights, file_path): + ''' + Function that creates the figure with the density of the weights + :param all_experiment_weights: The weights for the different experiments + :param file path: str, path where the figure will be saved + ''' - for kernel in ['gaussian', 'tophat', 'epanechnikov']: - kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X_plot) - log_dens = kde.score_samples(X_plot) - ax.plot(X_plot, np.exp(log_dens), '-', - label="kernel = '{0}'".format(kernel)) + all_experiment_weights = np.array(list(all_experiment_weights.values())) + n = len(all_experiment_weights) + colors = Plotter.get_colors_from_cmap(n) - ax.legend(loc='upper left') - ax.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), '+k') + fig, ax = plt.subplots() + for i in range(n): + for weights in all_experiment_weights[i]: + pd.Series([weight for weight in weights if weight != 0]).plot.kde( + figsize=(15, 10), ax=ax, color=colors[i]) - ax.set_xlim(-4, 9) - ax.set_ylim(-0.02, 0.4) + ax.set_title('Density weights of the OMP') fig.savefig(file_path, dpi=fig.dpi) plt.close(fig) diff --git a/code/compute_results.py b/code/compute_results.py index fb09e42d3fc6829358b622ad4291bd064a9c65a3..78d30272fcf56620249208f356c3d04bb9177f9d 100644 --- a/code/compute_results.py +++ b/code/compute_results.py @@ -59,6 +59,8 @@ if __name__ == "__main__": experiment_dev_scores = dict() experiment_test_scores = dict() + experiment_weights = dict() + # Used to check if all losses were computed using the same metric (it should be the case) experiment_score_metrics = list() @@ -74,6 +76,8 @@ if __name__ == "__main__": experiment_dev_scores[seed] = list() experiment_test_scores[seed] = list() + experiment_weights[seed] = list() + # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_size extracted_forest_sizes = os.listdir(extracted_forest_size_root_path) for extracted_forest_size in extracted_forest_sizes: @@ -84,9 +88,13 @@ if __name__ == "__main__": # Load [...]/model_parameters.json file and build the model using these parameters and the weights and forest from model_raw_results.pickle model = ModelFactory.load(dataset.task, extracted_forest_size_path, experiment_id, model_raw_results) # Save temporarly some raw results (TODO: to complete to retreive more results) + # Save the scores experiment_train_scores[seed].append(model_raw_results.train_score) experiment_dev_scores[seed].append(model_raw_results.dev_score) experiment_test_scores[seed].append(model_raw_results.test_score) + # Save the weights + experiment_weights[seed].append(model_raw_results.weights) + # Save the metric experiment_score_metrics.append(model_raw_results.score_metric) if len(set(experiment_score_metrics)) > 1: @@ -107,3 +115,9 @@ if __name__ == "__main__": all_labels=['train', 'dev', 'test'], title='Loss values of the trained model' ) + + # Plot the density of the weights + Plotter.weight_density( + file_path=args.results_dir + os.sep + experiment_id + os.sep + 'density_weight.png', + all_experiment_weights=experiment_weights + ) diff --git a/requirements.txt b/requirements.txt index 437c0178801cca15b39561318f6e244c15b34533..e203595adf44b2ff9a37f4976276762e4a1c8130 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ python-dotenv>=0.5.1 scikit-learn python-dotenv tqdm -matplotlib \ No newline at end of file +matplotlib +pandas \ No newline at end of file