From bbad0e522d6b4b392f1926fa935f2a7fac093411 Mon Sep 17 00:00:00 2001 From: Luc Giffon <luc.giffon@lis-lab.fr> Date: Thu, 26 Mar 2020 08:48:31 +0100 Subject: [PATCH] results_to_csv script --- code/vizualisation/results_to_csv.py | 102 +++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 code/vizualisation/results_to_csv.py diff --git a/code/vizualisation/results_to_csv.py b/code/vizualisation/results_to_csv.py new file mode 100644 index 0000000..d800d64 --- /dev/null +++ b/code/vizualisation/results_to_csv.py @@ -0,0 +1,102 @@ +from pathlib import Path +import os +import pandas as pd +from pprint import pprint +import pickle +from collections import defaultdict + +from dotenv import load_dotenv, find_dotenv + + +dct_experiment_id_subset = dict((str(idx), "train+dev/train+dev") for idx in range(1, 9)) +dct_experiment_id_subset.update(dict((str(idx), "train/dev") for idx in range(9, 17))) + +dct_experiment_id_technique = {"1": 'None', + "2": 'Random', + "3": 'OMP', + "4": 'OMP Distillation', + "5": 'Kmeans', + "6": 'Zhang Similarities', + "7": 'Zhang Predictions', + "8": 'Ensemble', + "9": 'None', + "10": 'Random', + "11": 'OMP', + "12": 'OMP Distillation', + "13": 'Kmeans', + "14": 'Zhang Similarities', + "15": 'Zhang Predictions', + "16": 'Ensemble' + } + + +dct_dataset_fancy = { + "boston": "Boston", + "breast_cancer": "Breast Cancer", + "california_housing": "California Housing", + "diabetes": "Diabetes", + "diamonds": "Diamonds", + "digits": "Digits", + "iris": "Iris", + "kin8nm": "Kin8nm", + "kr-vs-kp": "KR-VS-KP", + "olivetti_faces": "Olivetti Faces", + "spambase": "Spambase", + "steel-plates": "Steel Plates", + "wine": "Wine", + "gamma": "Gamma", + "lfw_pairs": "LFW Pairs" +} + +skip_attributes = ["datetime", "model_weights"] + + +if __name__ == "__main__": + + load_dotenv(find_dotenv('.env')) + dir_name = "results/bolsonaro_models_25-03-20" + dir_path = Path(os.environ["project_dir"]) / dir_name + + output_dir_file = dir_path / "results.csv" + + dct_results = defaultdict(lambda: []) + + for root, dirs, files in os.walk(dir_path, topdown=False): + for file_str in files: + path_dir = Path(root) + path_file = path_dir / file_str + obj_results = pickle.load(open(path_file, 'rb')) + + path_dir_split = str(path_dir).split("/") + + bool_wo_weights = "no_weights" in str(path_file) + + if bool_wo_weights: + forest_size = int(path_dir_split[-1].split("_")[0]) + else: + forest_size = int(path_dir_split[-1]) + + seed = int(path_dir_split[-3]) + id_xp = str(path_dir_split[-5]) + dataset = str(path_dir_split[-6]) + + dct_results["forest_size"].append(forest_size) + dct_results["seed"].append(seed) + dct_results["dataset"].append(dct_dataset_fancy[dataset]) + dct_results["subset"].append(dct_experiment_id_subset[id_xp]) + dct_results["strategy"].append(dct_experiment_id_technique[id_xp]) + dct_results["wo_weights"].append(bool_wo_weights) + + for key_result, val_result in obj_results.items(): + if key_result in skip_attributes: + continue + if val_result == "": + val_result = None + dct_results[key_result].append(val_result) + + print(path_file) + + + final_df = pd.DataFrame.from_dict(dct_results) + final_df.to_csv(output_dir_file) + print(final_df) -- GitLab