Newer
Older
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio
lst_skip_strategy = ["None", "OMP Distillation", "OMP Distillation w/o weights"]
# lst_skip_subset = ["train/dev"]
"coherence",
"correlation",
# "negative-percentage"
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
]
dct_score_metric_fancy = {
"accuracy_score": "% Accuracy",
"mean_squared_error": "MSE"
}
pio.templates.default = "plotly_white"
dct_color_by_strategy = {
"OMP": (255, 0, 0), # red
"OMP Distillation": (255, 0, 0), # red
"OMP Distillation w/o weights": (255, 128, 0), # orange
"OMP w/o weights": (255, 128, 0), # orange
"Random": (0, 0, 0), # black
"Zhang Similarities": (255, 255, 0), # jaune
'Zhang Predictions': (128, 0, 128), # turquoise
'Ensemble': (0, 0, 255), # blue
"Kmeans": (0, 255, 0) # red
}
dct_dash_by_strategy = {
"OMP": None,
"OMP Distillation": "dash",
"OMP Distillation w/o weights": "dash",
"OMP w/o weights": None,
"Random": "dot",
"Zhang Similarities": "dash",
'Zhang Predictions': "dash",
'Ensemble': "dash",
"Kmeans": "dash"
}
def add_trace_from_df(df, fig):
df.sort_values(by="forest_size", inplace=True)
df_groupby_forest_size = df.groupby(['forest_size'])
forest_sizes = list(df_groupby_forest_size["forest_size"].mean().values)
mean_value = df_groupby_forest_size[task].mean().values
std_value = df_groupby_forest_size[task].std().values
std_value_upper = list(mean_value + std_value)
std_value_lower = list(mean_value - std_value)
# print(df_strat)
fig.add_trace(go.Scatter(x=forest_sizes, y=mean_value,
mode='lines',
name=strat,
line=dict(dash=dct_dash_by_strategy[strat], color="rgb{}".format(dct_color_by_strategy[strat]))
))
fig.add_trace(go.Scatter(
x=forest_sizes + forest_sizes[::-1],
y=std_value_upper + std_value_lower[::-1],
fill='toself',
showlegend=False,
fillcolor='rgba{}'.format(dct_color_by_strategy[strat] + tpl_transparency),
line_color='rgba(255,255,255,0)',
name=strat
))
tpl_transparency = (0.1,)
if __name__ == "__main__":
load_dotenv(find_dotenv('.env'))
dir_name = "bolsonaro_models_25-03-20"
dir_path = Path(os.environ["project_dir"]) / "results" / dir_name
out_dir = Path(os.environ["project_dir"]) / "reports/figures" / dir_name
input_dir_file = dir_path / "results.csv"
df_results = pd.read_csv(open(input_dir_file, 'rb'))
datasets = set(df_results["dataset"].values)
strategies = set(df_results["strategy"].values)
subsets = set(df_results["subset"].values)
for task in tasks:
for data_name in datasets:
df_data = df_results[df_results["dataset"] == data_name]
score_metric_name = df_data["score_metric"].values[0]
##################
# all techniques #
##################
for strat in strategies:
if strat in lst_skip_strategy:
continue
df_strat = df_data[df_data["strategy"] == strat]
df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"]
if "OMP" in strat:
###########################
# traitement avec weights #
###########################
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
if data_name == "Boston":
df_strat_wo_weights = df_strat_wo_weights[df_strat_wo_weights["forest_size"] < 400]
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#################################
# traitement general wo_weights #
#################################
if "OMP" in strat:
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == True]
else:
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
if "OMP" in strat:
strat = "{} w/o weights".format(strat)
add_trace_from_df(df_strat_wo_weights, fig)
title = "{} {}".format(task, data_name)
yaxis_title = "% negative weights" if task == "negative-percentage" else dct_score_metric_fancy[score_metric_name]
fig.update_layout(barmode='group',
title=title,
xaxis_title="# Selected Trees",
yaxis_title=yaxis_title,
font=dict(
# family="Courier New, monospace",
size=24,
color="black"
),
showlegend = False,
margin = dict(
l=1,
r=1,
b=1,
t=1,
# pad=4
),
legend=dict(
traceorder="normal",
# bgcolor="LightSteelBlue",
# bordercolor="Black",
borderwidth=1,
)
)
# fig.show()
sanitize = lambda x: x.replace(" ", "_").replace("/", "_").replace("+", "_")
filename = sanitize(title)
output_dir = out_dir / sanitize(task)
output_dir.mkdir(parents=True, exist_ok=True)
fig.write_image(str((output_dir / filename).absolute()) + ".png")
# exit()