Skip to content
Snippets Groups Projects
results_to_csv.py 5.63 KiB
Newer Older
  • Learn to ignore specific revisions
  • Luc Giffon's avatar
    Luc Giffon committed
    from pathlib import Path
    import os
    import pandas as pd
    from pprint import pprint
    import pickle
    from collections import defaultdict
    
    Luc Giffon's avatar
    Luc Giffon committed
    import numpy as np
    
    Luc Giffon's avatar
    Luc Giffon committed
    
    from dotenv import load_dotenv, find_dotenv
    
    
    dct_experiment_id_subset = dict((str(idx), "train+dev/train+dev") for idx in range(1, 9))
    dct_experiment_id_subset.update(dict((str(idx), "train/dev") for idx in range(9, 17)))
    
    
    Luc Giffon's avatar
    Luc Giffon committed
    NONE = 'None'
    Random = 'Random'
    OMP = 'OMP'
    OMP_Distillation = 'OMP Distillation'
    Kmeans = 'Kmeans'
    Zhang_Similarities = 'Zhang Similarities'
    Zhang_Predictions = 'Zhang Predictions'
    Ensemble = 'Ensemble'
    dct_experiment_id_technique = {"1": NONE,
                                   "2": Random,
                                   "3": OMP,
                                   "4": OMP_Distillation,
                                   "5": Kmeans,
                                   "6": Zhang_Similarities,
                                   "7": Zhang_Predictions,
                                   "8": Ensemble,
                                   "9": NONE,
                                   "10": Random,
                                   "11": OMP,
                                   "12": OMP_Distillation,
                                   "13": Kmeans,
                                   "14": Zhang_Similarities,
                                   "15": Zhang_Predictions,
                                   "16": Ensemble
    
    Luc Giffon's avatar
    Luc Giffon committed
                                   }
    
    
    dct_dataset_fancy = {
        "boston": "Boston",
        "breast_cancer": "Breast Cancer",
        "california_housing": "California Housing",
        "diabetes": "Diabetes",
        "diamonds": "Diamonds",
        "digits": "Digits",
        "iris": "Iris",
        "kin8nm": "Kin8nm",
        "kr-vs-kp": "KR-VS-KP",
        "olivetti_faces": "Olivetti Faces",
        "spambase": "Spambase",
        "steel-plates": "Steel Plates",
        "wine": "Wine",
        "gamma": "Gamma",
        "lfw_pairs": "LFW Pairs"
    }
    
    
    Luc Giffon's avatar
    Luc Giffon committed
    skip_attributes = ["datetime"]
    
    Luc Giffon's avatar
    Luc Giffon committed
    set_no_coherence = set()
    set_no_corr = set()
    
    Luc Giffon's avatar
    Luc Giffon committed
    
    if __name__ == "__main__":
    
        load_dotenv(find_dotenv('.env'))
        dir_name = "results/bolsonaro_models_25-03-20"
        dir_path = Path(os.environ["project_dir"]) / dir_name
    
        output_dir_file = dir_path / "results.csv"
    
        dct_results = defaultdict(lambda: [])
    
        for root, dirs, files in os.walk(dir_path, topdown=False):
            for file_str in files:
    
    Luc Giffon's avatar
    Luc Giffon committed
                if file_str == "results.csv":
                    continue
    
    Luc Giffon's avatar
    Luc Giffon committed
                path_dir = Path(root)
                path_file = path_dir / file_str
    
    Luc Giffon's avatar
    Luc Giffon committed
                print(path_file)
    
    Luc Giffon's avatar
    Luc Giffon committed
                try:
    
    Luc Giffon's avatar
    Luc Giffon committed
                    with open(path_file, 'rb') as pickle_file:
                        obj_results = pickle.load(pickle_file)
    
    Luc Giffon's avatar
    Luc Giffon committed
                except:
                    print("problem loading pickle file {}".format(path_file))
    
    Luc Giffon's avatar
    Luc Giffon committed
    
                path_dir_split = str(path_dir).split("/")
    
                bool_wo_weights = "no_weights" in str(path_file)
    
                if bool_wo_weights:
                    forest_size = int(path_dir_split[-1].split("_")[0])
                else:
                    forest_size = int(path_dir_split[-1])
    
                seed = int(path_dir_split[-3])
                id_xp = str(path_dir_split[-5])
                dataset = str(path_dir_split[-6])
    
                dct_results["forest_size"].append(forest_size)
                dct_results["seed"].append(seed)
                dct_results["dataset"].append(dct_dataset_fancy[dataset])
                dct_results["subset"].append(dct_experiment_id_subset[id_xp])
                dct_results["strategy"].append(dct_experiment_id_technique[id_xp])
                dct_results["wo_weights"].append(bool_wo_weights)
    
                for key_result, val_result in obj_results.items():
                    if key_result in skip_attributes:
                        continue
    
    Luc Giffon's avatar
    Luc Giffon committed
                    if key_result == "model_weights":
                        if val_result == "":
                            dct_results["negative-percentage"].append(None)
                        else:
                            lt_zero = val_result < 0
                            gt_zero = val_result > 0
    
                            nb_lt_zero = np.sum(lt_zero)
                            nb_gt_zero = np.sum(gt_zero)
    
                            percentage_lt_zero = nb_lt_zero / (nb_gt_zero + nb_lt_zero)
                            dct_results["negative-percentage"].append(percentage_lt_zero)
    
    Luc Giffon's avatar
    Luc Giffon committed
                    if val_result == "":
    
    Luc Giffon's avatar
    Luc Giffon committed
                        # print(key_result, val_result)
    
    Luc Giffon's avatar
    Luc Giffon committed
                        val_result = None
    
    Luc Giffon's avatar
    Luc Giffon committed
                    if key_result == "coherence" and val_result is None:
                        set_no_coherence.add(id_xp)
                    if key_result == "correlation" and val_result is None:
                        set_no_corr.add(id_xp)
    
    
    Luc Giffon's avatar
    Luc Giffon committed
                    dct_results[key_result].append(val_result)
    
    
    Luc Giffon's avatar
    Luc Giffon committed
                    # class 'dict'>: {'model_weights': '',
                    #                 'training_time': 0.0032033920288085938,
                    #                 'datetime': datetime.datetime(2020, 3, 25, 0, 28, 34, 938400),
                    #                 'train_score': 1.0,
                    #                 'dev_score': 0.978021978021978,
                    #                 'test_score': 0.9736842105263158,
                    #                 'train_score_base': 1.0,
                    #                 'dev_score_base': 0.978021978021978,
                    #                 'test_score_base': 0.9736842105263158,
                    #                 'score_metric': 'accuracy_score',
                    #                 'base_score_metric': 'accuracy_score',
                    #                 'coherence': 0.9892031711775613,
                    #                 'correlation': 0.9510700193340448}
    
                # print(path_file)
    
        print("coh", set_no_coherence, len(set_no_coherence))
        print("cor", set_no_corr, len(set_no_corr))
    
    Luc Giffon's avatar
    Luc Giffon committed
    
    
        final_df = pd.DataFrame.from_dict(dct_results)
        final_df.to_csv(output_dir_file)
        print(final_df)