Skip to content
Snippets Groups Projects
Commit 1379c412 authored by Léo Bouscarrat's avatar Léo Bouscarrat
Browse files

Change for multiclass

parent c80ddd61
Branches
No related tags found
2 merge requests!11Resolve "Correction of multiclass classif",!9Resolve "Experiment pipeline"
This commit is part of merge request !11. Comments created here will be created in the context of that merge request.
Showing
with 88 additions and 58 deletions
...@@ -24,6 +24,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -24,6 +24,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
return self._base_forest_estimator.score(X, y) return self._base_forest_estimator.score(X, y)
def _base_estimator_predictions(self, X): def _base_estimator_predictions(self, X):
# We need to use predict_proba to get the probabilities of each class
return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
@property @property
...@@ -66,7 +67,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -66,7 +67,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
if normalize_weights: if normalize_weights:
# we can normalize weights (by their sum) so that they sum to 1 # we can normalize weights (by their sum) so that they sum to 1
# and they can be interpreted as impact percentages for interpretability. # and they can be interpreted as impact percentages for interpretability.
# this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) # this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why
# question: je comprend pas le truc avec nonszero? # question: je comprend pas le truc avec nonszero?
# predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_)))) # predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
......
...@@ -60,7 +60,7 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -60,7 +60,7 @@ class OmpForestMulticlassClassifier(OmpForest):
for class_label in possible_classes: for class_label in possible_classes:
atoms_binary = binarize_class_data(atoms, class_label, inplace=False) atoms_binary = binarize_class_data(atoms, class_label, inplace=False)
objective_binary = binarize_class_data(objective, class_label, inplace=False) objective_binary = binarize_class_data(objective, class_label, inplace=False)
# todo peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP... # TODO: peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
omp_class = OrthogonalMatchingPursuit( omp_class = OrthogonalMatchingPursuit(
n_nonzero_coefs=self.models_parameters.extracted_forest_size, n_nonzero_coefs=self.models_parameters.extracted_forest_size,
fit_intercept=True, normalize=False) fit_intercept=True, normalize=False)
...@@ -69,7 +69,9 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -69,7 +69,9 @@ class OmpForestMulticlassClassifier(OmpForest):
return self._dct_class_omp return self._dct_class_omp
def predict(self, X): def predict(self, X):
forest_predictions = self._base_estimator_predictions(X) '''forest_predictions = self._base_estimator_predictions(X)
print(forest_predictions.shape)
if self._models_parameters.normalize_D: if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
...@@ -79,9 +81,26 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -79,9 +81,26 @@ class OmpForestMulticlassClassifier(OmpForest):
for class_label, omp_class in self._dct_class_omp.items(): for class_label, omp_class in self._dct_class_omp.items():
label_names.append(class_label) label_names.append(class_label)
atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False) atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False)
print(atoms_binary.shape)
preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights)) preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
# todo verifier que ce n'est pas bugué ici # TODO: verifier que ce n'est pas bugué ici
preds = np.array(preds).T'''
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
label_names = []
preds = []
num_class = 0
for class_label, omp_class in self._dct_class_omp.items():
label_names.append(class_label)
atoms_binary = (forest_predictions[num_class] - 0.5) * 2 # centré réduit de 0/1 à -1/1
preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
num_class += 1
preds = np.array(preds).T preds = np.array(preds).T
max_preds = np.argmax(preds, axis=1) max_preds = np.argmax(preds, axis=1)
...@@ -97,6 +116,27 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -97,6 +116,27 @@ class OmpForestMulticlassClassifier(OmpForest):
return evaluation return evaluation
@staticmethod
def _make_omp_weighted_prediction(base_predictions, omp_obj, normalize_weights=False):
if normalize_weights:
# we can normalize weights (by their sum) so that they sum to 1
# and they can be interpreted as impact percentages for interpretability.
# this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why
# question: je comprend pas le truc avec nonszero?
# predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
coef_signs = np.sign(omp_obj.coef_)[np.newaxis, :] # add axis to make sure it will be broadcasted line-wise (there might be a confusion when forest_prediction is square)
unsigned_coef = (coef_signs * omp_obj.coef_).squeeze()
intercept = omp_obj.intercept_
adjusted_forest_predictions = base_predictions * coef_signs
predictions = adjusted_forest_predictions.dot(unsigned_coef) + intercept
else:
predictions = omp_obj.predict(base_predictions)
return predictions
if __name__ == "__main__": if __name__ == "__main__":
forest = RandomForestClassifier(n_estimators=10) forest = RandomForestClassifier(n_estimators=10)
......
...@@ -60,7 +60,6 @@ def binarize_class_data(data, class_pos, inplace=True): ...@@ -60,7 +60,6 @@ def binarize_class_data(data, class_pos, inplace=True):
""" """
if not inplace: if not inplace:
data = deepcopy(data) data = deepcopy(data)
position_class_labels = (data == class_pos) position_class_labels = (data == class_pos)
data[~(position_class_labels)] = -1 data[~(position_class_labels)] = -1
data[(position_class_labels)] = +1 data[(position_class_labels)] = +1
......
...@@ -6,18 +6,15 @@ ...@@ -6,18 +6,15 @@
"normalize_D": false, "normalize_D": false,
"dataset_normalizer": "standard", "dataset_normalizer": "standard",
"forest_size": null, "forest_size": null,
"extracted_forest_size_samples": 5, "extracted_forest_size_samples": 10,
"extracted_forest_size_stop": 0.05, "extracted_forest_size_stop": 0.4,
"models_dir": "models/boston/stage1", "models_dir": "models/boston/stage1",
"dev_size": 0.2, "dev_size": 0.2,
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 2078,
2, 90
3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
...@@ -30,10 +27,15 @@ ...@@ -30,10 +27,15 @@
"job_number": -1, "job_number": -1,
"extraction_strategy": "none", "extraction_strategy": "none",
"extracted_forest_size": [ "extracted_forest_size": [
8, 36,
17, 73,
25, 109,
33, 145,
42 182,
218,
255,
291,
327,
364
] ]
} }
\ No newline at end of file
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
results/iris/stage1/losses.png

35.2 KiB | W: | H:

results/iris/stage1/losses.png

64.7 KiB | W: | H:

results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
  • 2-up
  • Swipe
  • Onion skin
results/iris/stage2/losses.png

30.5 KiB

results/iris/stage3/losses.png

30.6 KiB

results/iris/stage4/losses.png

30.7 KiB

python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=california_housing --models_dir=models/california_housing/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=california_housing --models_dir=models/california_housing/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=boston --models_dir=models/boston/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=boston --models_dir=models/boston/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=iris --models_dir=models/iris/stage3 python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=iris --models_dir=models/iris/stage1
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=diabetes --models_dir=models/diabetes/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=diabetes --models_dir=models/diabetes/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=digits --models_dir=models/digits/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=digits --models_dir=models/digits/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=linnerud --models_dir=models/linnerud/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=linnerud --models_dir=models/linnerud/stage3
......
#!/bin/bash #!/bin/bash
core_number=10 core_number=5
walltime=1:00 walltime=1:00
seeds='1 2 3' seeds='58 43535 234234'
for dataset in diamonds for dataset in iris
do do
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=1 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=1 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=2 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=2 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=5:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=3 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=3 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=4 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=4 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=5 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=5 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=5:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=6 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=6 --models_dir=models/$dataset/stage1
done done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment