diff --git a/code/bolsonaro/models/omp_forest.py b/code/bolsonaro/models/omp_forest.py index 6c9a3f91587ac6b34cf17f47c6b46d5bb8ce9c54..0e2a109a9745b3d94021eb523ad7fc3129fb8793 100644 --- a/code/bolsonaro/models/omp_forest.py +++ b/code/bolsonaro/models/omp_forest.py @@ -136,9 +136,9 @@ class SingleOmpForest(OmpForest): Make all the base tree predictions :param X: a Forest - :return: a np.array of the predictions of the entire forest + :return: a np.array of the predictions of the trees selected by OMP without applyong the weight """ - forest_predictions = self._base_estimator_predictions(X).T + forest_predictions = np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]) if self._models_parameters.normalize_D: forest_predictions = forest_predictions.T @@ -146,7 +146,5 @@ class SingleOmpForest(OmpForest): forest_predictions = forest_predictions.T weights = self._omp.coef_ - omp_trees_indices = np.nonzero(weights)[0] - - select_trees = np.mean(forest_predictions[omp_trees_indices], axis=0) + select_trees = np.mean(forest_predictions[weights != 0], axis=0) return select_trees diff --git a/code/bolsonaro/models/omp_forest_classifier.py b/code/bolsonaro/models/omp_forest_classifier.py index 3cb250de23870a707199048eb6842179399a3de6..9dfabb7d9c2836b116ccce97aa090a67cd134403 100644 --- a/code/bolsonaro/models/omp_forest_classifier.py +++ b/code/bolsonaro/models/omp_forest_classifier.py @@ -42,9 +42,7 @@ class OmpForestBinaryClassifier(SingleOmpForest): forest_predictions = forest_predictions.T weights = self._omp.coef_ - omp_trees_indices = np.nonzero(weights) - - omp_trees_predictions = forest_predictions[omp_trees_indices].T[1] + omp_trees_predictions = forest_predictions[weights != 0].T[1] # Here forest_pred is the probability of being class 1. diff --git a/code/compute_results.py b/code/compute_results.py index a6eb2f5fb3e6a9326a126fb609614c9d8a8bffcd..81f4ff94bc243e4e20d5251534f5201db8123e8c 100644 --- a/code/compute_results.py +++ b/code/compute_results.py @@ -366,7 +366,7 @@ if __name__ == "__main__": omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( args.models_dir, args.results_dir, int(args.experiment_ids[2])) #omp_with_params_without_weights - logger.info('Loading omp_with_params experiment scores...') + logger.info('Loading omp_no_weights experiment scores...') omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \ omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( args.models_dir, args.results_dir, int(args.experiment_ids[2]), weights=False) diff --git a/scripts/run_compute_results.sh b/scripts/run_compute_results.sh index ccf3cc24c112269cd7c2dd3b235851c8c727e256..ef15c2a6a55d9d4d053c7428a063c4360e337b53 100755 --- a/scripts/run_compute_results.sh +++ b/scripts/run_compute_results.sh @@ -1,7 +1,9 @@ seeds='1 2 3' -for dataset in california_housing #kin8nm kr-vs-kp spambase steel-plates diabetes diamonds boston california_housing #lfw_pairs diamonds boston iris diabetes digits linnerud wine breast_cancer olivetti_faces 20newsgroups_vectorized california_housing +for dataset in kin8nm kr-vs-kp spambase steel-plates california_housing boston iris diabetes digits wine breast_cancer olivetti_faces diamonds do + python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=$dataset --models_dir=models/$dataset/stage1 + python code/compute_results.py --stage=2 --experiment_ids 1 2 3 4 --dataset_name=$dataset --models_dir=models/$dataset/stage2 + python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage3 python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4 - #python code/compute_results.py --stage=5 --experiment_ids 1 2 3 kmeans=5 --dataset_name=$dataset --models_dir=models/$dataset/stage5 - #python code/compute_results.py --stage=5 --experiment_ids 1 2 3 ensemble=5 --dataset_name=$dataset --models_dir=models/$dataset/stage5_similarity + python code/compute_results.py --stage=5 --experiment_ids 1 2 3 similarity=4 kmeans=5 ensemble=6 --dataset_name=$dataset --models_dir=models/$dataset/stage5 done diff --git a/scripts/run_stage1_experiments.sh b/scripts/run_stage1_experiments.sh index 237c265a99d603528d4f69c19ff849cf87efe12b..83784619ab8932424cc8e0237fea2f28db123a50 100755 --- a/scripts/run_stage1_experiments.sh +++ b/scripts/run_stage1_experiments.sh @@ -5,10 +5,10 @@ seeds='1 2 3 4 5' for dataset in kin8nm kr-vs-kp spambase steel-plates california_housing boston iris diabetes digits wine breast_cancer olivetti_faces diamonds do - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 none_with_params --extraction_strategy=none --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=1 --models_dir=models/$dataset/stage1" - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 random_with_params --extraction_strategy=random --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=2 --models_dir=models/$dataset/stage1" - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=3 --models_dir=models/$dataset/stage1" - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 none_wo_params --extraction_strategy=none --skip_best_hyperparams --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=4 --models_dir=models/$dataset/stage1" - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 random_wo_params --extraction_strategy=random --skip_best_hyperparams --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=5 --models_dir=models/$dataset/stage1" - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 omp_wo_params --skip_best_hyperparams --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=6 --models_dir=models/$dataset/stage1" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 none_with_params --extraction_strategy=none --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=1 --models_dir=models/$dataset/stage1" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 random_with_params --extraction_strategy=random --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=2 --models_dir=models/$dataset/stage1" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=3 --models_dir=models/$dataset/stage1" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 none_wo_params --extraction_strategy=none --skip_best_hyperparams --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=4 --models_dir=models/$dataset/stage1" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 random_wo_params --extraction_strategy=random --skip_best_hyperparams --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=5 --models_dir=models/$dataset/stage1" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 omp_wo_params --skip_best_hyperparams --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --experiment_id=6 --models_dir=models/$dataset/stage1" done diff --git a/scripts/run_stage3_experiments.sh b/scripts/run_stage3_experiments.sh index 73e8c1a732afd49844f9739cab1f83527d1ce858..236d4fa760e4c3ef4d7b37d0bd2c81803da9acd2 100755 --- a/scripts/run_stage3_experiments.sh +++ b/scripts/run_stage3_experiments.sh @@ -1,11 +1,11 @@ #!/bin/bash core_number=5 -walltime=1:00 +walltime=$walltime seeds='1 2 3 4 5' for dataset in kin8nm kr-vs-kp spambase steel-plates california_housing boston iris diabetes digits wine breast_cancer olivetti_faces diamonds do - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 3 train-dev_subset --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --subsets_used=train,dev --experiment_id=1 --models_dir=models/$dataset/stage3" - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 3 train-dev_train-dev_subset --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --subsets_used=train+dev,train+dev --experiment_id=2 --models_dir=models/$dataset/stage3" - oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 3 train-train-dev_subset --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --subsets_used=train,train+dev --experiment_id=3 --models_dir=models/$dataset/stage3" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 3 train-dev_subset --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --subsets_used=train,dev --experiment_id=1 --models_dir=models/$dataset/stage3" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 3 train-dev_train-dev_subset --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --subsets_used=train+dev,train+dev --experiment_id=2 --models_dir=models/$dataset/stage3" + oarsub -p "(gpu is null)" -l /core=$core_number,walltime=$walltime "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 3 train-train-dev_subset --extracted_forest_size_stop=1 --extracted_forest_size_samples=30 --subsets_used=train,train+dev --experiment_id=3 --models_dir=models/$dataset/stage3" done