change results treatment

f41d9087 · Luc Giffon · 5e3368e9 · f41d9087 · f41d9087 · f41d9087
Commit f41d9087 authored 5 years ago by Luc Giffon
--- a/code/playground/nn_omp.py
+++ b/code/playground/nn_omp.py
--- a/code/vizualisation/csv_to_figure.py
+++ b/code/vizualisation/csv_to_figure.py
@@ -9,15 +9,15 @@ import plotly.io as pio

 lst_skip_strategy = ["None", "OMP Distillation", "OMP Distillation w/o weights"]
 # lst_skip_subset = ["train/dev"]
-lst_skip_subset = []
+lst_task_train_dev = ["coherence", "correlation"]

 tasks = [
    # "train_score",
    # "dev_score",
    # "test_score",
-    # "coherence",
-    # "correlation",
-    "negative-percentage"
+    "coherence",
+    "correlation",
+    # "negative-percentage"
 ]

 dct_score_metric_fancy = {
@@ -98,10 +98,6 @@ if __name__ == "__main__":
            df_data = df_results[df_results["dataset"] == data_name]
            score_metric_name = df_data["score_metric"].values[0]

-            for subset_name in subsets:
-                if subset_name in lst_skip_subset:
-                    continue
-                df_subset = df_data[df_data["subset"] == subset_name]
            fig = go.Figure()

            ##################
@@ -110,36 +106,32 @@ if __name__ == "__main__":
            for strat in strategies:
                if strat in lst_skip_strategy:
                    continue
-                    df_strat = df_subset[df_subset["strategy"] == strat]
+                df_strat = df_data[df_data["strategy"] == strat]
+                df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"]

                if "OMP" in strat:
                    ###########################
                    # traitement avec weights #
                    ###########################
                    df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
-                        if data_name == "Boston" and subset_name == "train+dev/train+dev":
+                    if data_name == "Boston":
                        df_strat_wo_weights = df_strat_wo_weights[df_strat_wo_weights["forest_size"] < 400]
                    add_trace_from_df(df_strat_wo_weights, fig)

-                    if "OMP" in strat and subset_name == "train/dev":
-                        continue
-                    elif "Random" not in strat and subset_name == "train/dev":
-                        continue
-
                #################################
                # traitement general wo_weights #
                #################################
-                    if "Random" in strat:
-                        df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
-                    else:
+                if "OMP" in strat:
                    df_strat_wo_weights = df_strat[df_strat["wo_weights"] == True]
+                else:
+                    df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]

                if "OMP" in strat:
                    strat = "{} w/o weights".format(strat)

                add_trace_from_df(df_strat_wo_weights, fig)

-                title = "{} {} {}".format(task, data_name, subset_name)
+            title = "{} {}".format(task, data_name)
            yaxis_title = "% negative weights" if task == "negative-percentage" else dct_score_metric_fancy[score_metric_name]
            fig.update_layout(barmode='group',
                              title=title,
@@ -173,7 +165,7 @@ if __name__ == "__main__":
            # fig.show()
            sanitize = lambda x: x.replace(" ", "_").replace("/", "_").replace("+", "_")
            filename = sanitize(title)
-                output_dir = out_dir / sanitize(subset_name) / sanitize(task)
+            output_dir = out_dir / sanitize(task)
            output_dir.mkdir(parents=True, exist_ok=True)
            fig.write_image(str((output_dir / filename).absolute()) + ".png")


--- a/code/vizualisation/csv_to_table.py
+++ b/code/vizualisation/csv_to_table.py
@@ -77,7 +77,7 @@ dct_data_metric = {


 def get_max_from_df(df, best_fct):
-    nb_to_consider = 30
+    nb_to_consider = 10
    df.sort_values(by="forest_size", inplace=True)
    df_groupby_forest_size = df.groupby(['forest_size'])
    forest_sizes = list(df_groupby_forest_size["forest_size"].mean().values)[:nb_to_consider]
@@ -292,6 +292,8 @@ if __name__ == "__main__":
                print("\\midrule")
            if idx_lin == 6:
                print("\\midrule")
+            if lst_data_ordered[idx_lin-1] == "Diamonds":
+                print("%", end="")
            line_print = " ".join(list(lin))
            line_print = line_print.rstrip(" &") + "\\\\"
            print(line_print)

--- a/code/vizualisation/results_to_csv.py
+++ b/code/vizualisation/results_to_csv.py
@@ -77,8 +77,10 @@ if __name__ == "__main__":
                continue
            path_dir = Path(root)
            path_file = path_dir / file_str
+            print(path_file)
            try:
-                obj_results = pickle.load(open(path_file, 'rb'))
+                with open(path_file, 'rb') as pickle_file:
+                    obj_results = pickle.load(pickle_file)
            except:
                print("problem loading pickle file {}".format(path_file))

@@ -118,6 +120,7 @@ if __name__ == "__main__":
                        percentage_lt_zero = nb_lt_zero / (nb_gt_zero + nb_lt_zero)
                        dct_results["negative-percentage"].append(percentage_lt_zero)
                if val_result == "":
+                    # print(key_result, val_result)
                    val_result = None
                if key_result == "coherence" and val_result is None:
                    set_no_coherence.add(id_xp)