From fc19c39e20ab76afa38a7055cc192247f374d4c5 Mon Sep 17 00:00:00 2001
From: Charly LAMOTHE <lamothe.c@intlocal.univ-amu.fr>
Date: Tue, 5 Nov 2019 15:24:47 +0100
Subject: [PATCH] Add comments on compute_results.py file

---
 code/compute_results.py | 44 +++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/code/compute_results.py b/code/compute_results.py
index 383b89b..fb09e42 100644
--- a/code/compute_results.py
+++ b/code/compute_results.py
@@ -24,40 +24,66 @@ if __name__ == "__main__":
     parser.add_argument('--experiment_ids', nargs='+', type=int, default=DEFAULT_EXPERIMENT_IDS, help='Compute the results of the specified experiment id(s)')
     args = parser.parse_args()
 
+    # Create recursively the results dir tree
     pathlib.Path(args.results_dir).mkdir(parents=True, exist_ok=True)
 
+    """
+    Use specified list of experiments ids if availabe.
+    Otherwise, list all existing experiment ids from
+    the specified models directory.
+    """
     experiments_ids = [str(experiment_id) for experiment_id in args.experiment_ids] \
         if args.experiment_ids is not None \
         else os.listdir(args.models_dir)
 
+    """
+    Raise an error if there's no experiments ids found both
+    in parameter or in models directory.
+    """
     if experiments_ids is None or len(experiments_ids) == 0:
         raise ValueError("No experiment id was found or specified.")
 
+    # Compute the plots for each experiment id
     for experiment_id in experiments_ids:
-        experiment_id_path = args.models_dir + os.sep + experiment_id
+        experiment_id_path = args.models_dir + os.sep + experiment_id # models/{experiment_id}
+        # Create recursively the tree results/{experiment_id}
         pathlib.Path(args.results_dir + os.sep + experiment_id).mkdir(parents=True, exist_ok=True)
-        experiment_seed_root_path = experiment_id_path + os.sep + 'seeds'
+        experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
 
+        """
+        Dictionaries to temporarly store the scalar results with the following structure:
+        {seed_1: [score_1, ..., score_m], ... seed_n: [score_1, ..., score_k]}
+        TODO: to complete to retreive more results
+        """
         experiment_train_scores = dict()
         experiment_dev_scores = dict()
         experiment_test_scores = dict()
+
+        # Used to check if all losses were computed using the same metric (it should be the case)
         experiment_score_metrics = list()
 
+        # For each seed results stored in models/{experiment_id}/seeds
         for seed in os.listdir(experiment_seed_root_path):
-            experiment_seed_path = experiment_seed_root_path + os.sep + seed
-            dataset_parameters = DatasetParameters.load(experiment_seed_path, experiment_id)
-            dataset = DatasetLoader.load(dataset_parameters)
-            extracted_forest_size_root_path = experiment_seed_path + os.sep + 'extracted_forest_size'
+            experiment_seed_path = experiment_seed_root_path + os.sep + seed # models/{experiment_id}/seeds/{seed}
+            dataset_parameters = DatasetParameters.load(experiment_seed_path, experiment_id) # Load the dataset parameters of this experiment, with this specific seed
+            dataset = DatasetLoader.load(dataset_parameters) # Load the dataset using the previously loaded dataset parameters
+            extracted_forest_size_root_path = experiment_seed_path + os.sep + 'extracted_forest_size' # models/{experiment_id}/seeds/{seed}/extracted_forest_size
 
+            # {{seed}:[]}
             experiment_train_scores[seed] = list()
             experiment_dev_scores[seed] = list()
             experiment_test_scores[seed] = list()
 
+            # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_size
             extracted_forest_sizes = os.listdir(extracted_forest_size_root_path)
             for extracted_forest_size in extracted_forest_sizes:
+                # models/{experiment_id}/seeds/{seed}/extracted_forest_size/{extracted_forest_size}
                 extracted_forest_size_path = extracted_forest_size_root_path + os.sep + extracted_forest_size
+                # Load models/{experiment_id}/seeds/{seed}/extracted_forest_size/{extracted_forest_size}/model_raw_results.pickle file
                 model_raw_results = ModelRawResults.load(extracted_forest_size_path)
+                # Load [...]/model_parameters.json file and build the model using these parameters and the weights and forest from model_raw_results.pickle
                 model = ModelFactory.load(dataset.task, extracted_forest_size_path, experiment_id, model_raw_results)
+                # Save temporarly some raw results (TODO: to complete to retreive more results)
                 experiment_train_scores[seed].append(model_raw_results.train_score)
                 experiment_dev_scores[seed].append(model_raw_results.dev_score)
                 experiment_test_scores[seed].append(model_raw_results.test_score)
@@ -66,6 +92,12 @@ if __name__ == "__main__":
         if len(set(experiment_score_metrics)) > 1:
             raise ValueError("The metrics used to compute the dev score aren't the same everytime")
 
+        """
+        Example of plot that just plots the losses computed
+        on the train, dev and test subsets using a trained
+        model, with the CI, and depending on the extracted
+        forest size.
+        """
         Plotter.plot_losses(
             file_path=args.results_dir + os.sep + experiment_id + os.sep + 'losses.png',
             all_experiment_scores=[experiment_train_scores, experiment_dev_scores, experiment_test_scores],
-- 
GitLab