From a0408dc1a6eef82d3fff226e56fe6c3085b7250e Mon Sep 17 00:00:00 2001
From: Luc Giffon <luc.giffon@lis-lab.fr>
Date: Sun, 29 Mar 2020 10:49:52 +0200
Subject: [PATCH] show negative correlation between score and percent negative

---
 code/vizualisation/csv_to_figure.py | 40 +++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/code/vizualisation/csv_to_figure.py b/code/vizualisation/csv_to_figure.py
index bfe1f5e..c57a98d 100644
--- a/code/vizualisation/csv_to_figure.py
+++ b/code/vizualisation/csv_to_figure.py
@@ -19,7 +19,8 @@ tasks = [
     # "correlation",
     # "negative-percentage",
     "dev_strength",
-    "test_strength"
+    "test_strength",
+    "negative-percentage-test-score"
 ]
 
 dct_score_metric_fancy = {
@@ -108,6 +109,40 @@ if __name__ == "__main__":
             for strat in strategies:
                 if strat in lst_skip_strategy:
                     continue
+
+                if task == "negative-percentage-test-score":
+                    if strat == "OMP":
+                        df_strat = df_data[df_data["strategy"] == strat]
+                        df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"]
+                        df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
+
+                        df_groupby_forest_size = df_strat_wo_weights.groupby(['forest_size'])
+
+                        forest_sizes = df_groupby_forest_size["forest_size"].mean().values
+                        x_values = df_groupby_forest_size["negative-percentage"].mean().values
+                        y_values = df_groupby_forest_size["test_score"].mean().values
+                        # print(df_strat)
+                        fig.add_trace(go.Scatter(x=x_values, y=y_values,
+                                                 mode='markers',
+                                                 name=strat,
+                                                 # color=forest_sizes,
+                                                 marker=dict(
+        # size=16,
+        # cmax=39,
+        # cmin=0,
+        color=forest_sizes,
+        colorbar=dict(
+            title="Forest Size"
+        ),
+        # colorscale="Viridis"
+    ),
+                                                 # marker=dict(color="rgb{}".format(dct_color_by_strategy[strat]))
+                                                 ))
+
+                    continue
+
+
+
                 df_strat = df_data[df_data["strategy"] == strat]
                 df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"]
 
@@ -135,9 +170,10 @@ if __name__ == "__main__":
 
             title = "{} {}".format(task, data_name)
             yaxis_title = "% negative weights" if task == "negative-percentage" else dct_score_metric_fancy[score_metric_name]
+            xaxis_title = "% negative weights" if task == "negative-percentage-test-score" else "# Selected Trees"
             fig.update_layout(barmode='group',
                               # title=title,
-                              xaxis_title="# Selected Trees",
+                              xaxis_title=xaxis_title,
                               yaxis_title=yaxis_title,
                               font=dict(
                                   # family="Courier New, monospace",
-- 
GitLab