diff --git a/code/playground/nn_omp.py b/code/playground/nn_omp.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/code/vizualisation/csv_to_figure.py b/code/vizualisation/csv_to_figure.py index 73b990f363b8e65a97e7c6a91c6b304a199e3034..244314ba9fcbb49ab48762c09dd63e4d69df9cf5 100644 --- a/code/vizualisation/csv_to_figure.py +++ b/code/vizualisation/csv_to_figure.py @@ -9,15 +9,15 @@ import plotly.io as pio lst_skip_strategy = ["None", "OMP Distillation", "OMP Distillation w/o weights"] # lst_skip_subset = ["train/dev"] -lst_skip_subset = [] +lst_task_train_dev = ["coherence", "correlation"] tasks = [ # "train_score", # "dev_score", # "test_score", - # "coherence", - # "correlation", - "negative-percentage" + "coherence", + "correlation", + # "negative-percentage" ] dct_score_metric_fancy = { @@ -98,83 +98,75 @@ if __name__ == "__main__": df_data = df_results[df_results["dataset"] == data_name] score_metric_name = df_data["score_metric"].values[0] - for subset_name in subsets: - if subset_name in lst_skip_subset: - continue - df_subset = df_data[df_data["subset"] == subset_name] - fig = go.Figure() - - ################## - # all techniques # - ################## - for strat in strategies: - if strat in lst_skip_strategy: - continue - df_strat = df_subset[df_subset["strategy"] == strat] - - if "OMP" in strat: - ########################### - # traitement avec weights # - ########################### - df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False] - if data_name == "Boston" and subset_name == "train+dev/train+dev": - df_strat_wo_weights = df_strat_wo_weights[df_strat_wo_weights["forest_size"] < 400] - add_trace_from_df(df_strat_wo_weights, fig) - - if "OMP" in strat and subset_name == "train/dev": - continue - elif "Random" not in strat and subset_name == "train/dev": - continue - - ################################# - # traitement general wo_weights # - ################################# - if "Random" in strat: - df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False] - else: - df_strat_wo_weights = df_strat[df_strat["wo_weights"] == True] - - if "OMP" in strat: - strat = "{} w/o weights".format(strat) + fig = go.Figure() + ################## + # all techniques # + ################## + for strat in strategies: + if strat in lst_skip_strategy: + continue + df_strat = df_data[df_data["strategy"] == strat] + df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"] + + if "OMP" in strat: + ########################### + # traitement avec weights # + ########################### + df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False] + if data_name == "Boston": + df_strat_wo_weights = df_strat_wo_weights[df_strat_wo_weights["forest_size"] < 400] add_trace_from_df(df_strat_wo_weights, fig) - title = "{} {} {}".format(task, data_name, subset_name) - yaxis_title = "% negative weights" if task == "negative-percentage" else dct_score_metric_fancy[score_metric_name] - fig.update_layout(barmode='group', - title=title, - xaxis_title="# Selected Trees", - yaxis_title=yaxis_title, + ################################# + # traitement general wo_weights # + ################################# + if "OMP" in strat: + df_strat_wo_weights = df_strat[df_strat["wo_weights"] == True] + else: + df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False] + + if "OMP" in strat: + strat = "{} w/o weights".format(strat) + + add_trace_from_df(df_strat_wo_weights, fig) + + title = "{} {}".format(task, data_name) + yaxis_title = "% negative weights" if task == "negative-percentage" else dct_score_metric_fancy[score_metric_name] + fig.update_layout(barmode='group', + title=title, + xaxis_title="# Selected Trees", + yaxis_title=yaxis_title, + font=dict( + # family="Courier New, monospace", + size=24, + color="black" + ), + showlegend = False, + margin = dict( + l=1, + r=1, + b=1, + t=1, + # pad=4 + ), + legend=dict( + traceorder="normal", font=dict( - # family="Courier New, monospace", + family="sans-serif", size=24, color="black" ), - showlegend = False, - margin = dict( - l=1, - r=1, - b=1, - t=1, - # pad=4 - ), - legend=dict( - traceorder="normal", - font=dict( - family="sans-serif", - size=24, - color="black" - ), - # bgcolor="LightSteelBlue", - # bordercolor="Black", - borderwidth=1, - ) - ) - # fig.show() - sanitize = lambda x: x.replace(" ", "_").replace("/", "_").replace("+", "_") - filename = sanitize(title) - output_dir = out_dir / sanitize(subset_name) / sanitize(task) - output_dir.mkdir(parents=True, exist_ok=True) - fig.write_image(str((output_dir / filename).absolute()) + ".png") - - # exit() + # bgcolor="LightSteelBlue", + # bordercolor="Black", + borderwidth=1, + ) + ) + # fig.show() + sanitize = lambda x: x.replace(" ", "_").replace("/", "_").replace("+", "_") + filename = sanitize(title) + output_dir = out_dir / sanitize(task) + output_dir.mkdir(parents=True, exist_ok=True) + fig.write_image(str((output_dir / filename).absolute()) + ".png") + + # exit() diff --git a/code/vizualisation/csv_to_table.py b/code/vizualisation/csv_to_table.py index 9547361f311ea1f0c9bb61b9f2454e26a2f8650a..440e5fc8454732e40af3cce667a04d4677d032af 100644 --- a/code/vizualisation/csv_to_table.py +++ b/code/vizualisation/csv_to_table.py @@ -77,7 +77,7 @@ dct_data_metric = { def get_max_from_df(df, best_fct): - nb_to_consider = 30 + nb_to_consider = 10 df.sort_values(by="forest_size", inplace=True) df_groupby_forest_size = df.groupby(['forest_size']) forest_sizes = list(df_groupby_forest_size["forest_size"].mean().values)[:nb_to_consider] @@ -292,6 +292,8 @@ if __name__ == "__main__": print("\\midrule") if idx_lin == 6: print("\\midrule") + if lst_data_ordered[idx_lin-1] == "Diamonds": + print("%", end="") line_print = " ".join(list(lin)) line_print = line_print.rstrip(" &") + "\\\\" print(line_print) diff --git a/code/vizualisation/results_to_csv.py b/code/vizualisation/results_to_csv.py index 1def6e62935debd9405900c721a1b29b45a2b8cb..669451b1f812f7f83584670790196601f1a5f40e 100644 --- a/code/vizualisation/results_to_csv.py +++ b/code/vizualisation/results_to_csv.py @@ -77,8 +77,10 @@ if __name__ == "__main__": continue path_dir = Path(root) path_file = path_dir / file_str + print(path_file) try: - obj_results = pickle.load(open(path_file, 'rb')) + with open(path_file, 'rb') as pickle_file: + obj_results = pickle.load(pickle_file) except: print("problem loading pickle file {}".format(path_file)) @@ -118,6 +120,7 @@ if __name__ == "__main__": percentage_lt_zero = nb_lt_zero / (nb_gt_zero + nb_lt_zero) dct_results["negative-percentage"].append(percentage_lt_zero) if val_result == "": + # print(key_result, val_result) val_result = None if key_result == "coherence" and val_result is None: set_no_coherence.add(id_xp)