Skip to content
Snippets Groups Projects
Commit 1379c412 authored by Léo Bouscarrat's avatar Léo Bouscarrat
Browse files

Change for multiclass

parent c80ddd61
Branches
No related tags found
2 merge requests!11Resolve "Correction of multiclass classif",!9Resolve "Experiment pipeline"
Showing
with 88 additions and 58 deletions
...@@ -24,6 +24,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -24,6 +24,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
return self._base_forest_estimator.score(X, y) return self._base_forest_estimator.score(X, y)
def _base_estimator_predictions(self, X): def _base_estimator_predictions(self, X):
# We need to use predict_proba to get the probabilities of each class
return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
@property @property
...@@ -66,7 +67,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -66,7 +67,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
if normalize_weights: if normalize_weights:
# we can normalize weights (by their sum) so that they sum to 1 # we can normalize weights (by their sum) so that they sum to 1
# and they can be interpreted as impact percentages for interpretability. # and they can be interpreted as impact percentages for interpretability.
# this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) # this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why
# question: je comprend pas le truc avec nonszero? # question: je comprend pas le truc avec nonszero?
# predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_)))) # predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
......
...@@ -60,7 +60,7 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -60,7 +60,7 @@ class OmpForestMulticlassClassifier(OmpForest):
for class_label in possible_classes: for class_label in possible_classes:
atoms_binary = binarize_class_data(atoms, class_label, inplace=False) atoms_binary = binarize_class_data(atoms, class_label, inplace=False)
objective_binary = binarize_class_data(objective, class_label, inplace=False) objective_binary = binarize_class_data(objective, class_label, inplace=False)
# todo peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP... # TODO: peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
omp_class = OrthogonalMatchingPursuit( omp_class = OrthogonalMatchingPursuit(
n_nonzero_coefs=self.models_parameters.extracted_forest_size, n_nonzero_coefs=self.models_parameters.extracted_forest_size,
fit_intercept=True, normalize=False) fit_intercept=True, normalize=False)
...@@ -69,7 +69,9 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -69,7 +69,9 @@ class OmpForestMulticlassClassifier(OmpForest):
return self._dct_class_omp return self._dct_class_omp
def predict(self, X): def predict(self, X):
forest_predictions = self._base_estimator_predictions(X) '''forest_predictions = self._base_estimator_predictions(X)
print(forest_predictions.shape)
if self._models_parameters.normalize_D: if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
...@@ -79,9 +81,26 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -79,9 +81,26 @@ class OmpForestMulticlassClassifier(OmpForest):
for class_label, omp_class in self._dct_class_omp.items(): for class_label, omp_class in self._dct_class_omp.items():
label_names.append(class_label) label_names.append(class_label)
atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False) atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False)
print(atoms_binary.shape)
preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights)) preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
# todo verifier que ce n'est pas bugué ici # TODO: verifier que ce n'est pas bugué ici
preds = np.array(preds).T'''
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
label_names = []
preds = []
num_class = 0
for class_label, omp_class in self._dct_class_omp.items():
label_names.append(class_label)
atoms_binary = (forest_predictions[num_class] - 0.5) * 2 # centré réduit de 0/1 à -1/1
preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
num_class += 1
preds = np.array(preds).T preds = np.array(preds).T
max_preds = np.argmax(preds, axis=1) max_preds = np.argmax(preds, axis=1)
...@@ -97,10 +116,31 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -97,10 +116,31 @@ class OmpForestMulticlassClassifier(OmpForest):
return evaluation return evaluation
@staticmethod
def _make_omp_weighted_prediction(base_predictions, omp_obj, normalize_weights=False):
if normalize_weights:
# we can normalize weights (by their sum) so that they sum to 1
# and they can be interpreted as impact percentages for interpretability.
# this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why
# question: je comprend pas le truc avec nonszero?
# predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
coef_signs = np.sign(omp_obj.coef_)[np.newaxis, :] # add axis to make sure it will be broadcasted line-wise (there might be a confusion when forest_prediction is square)
unsigned_coef = (coef_signs * omp_obj.coef_).squeeze()
intercept = omp_obj.intercept_
adjusted_forest_predictions = base_predictions * coef_signs
predictions = adjusted_forest_predictions.dot(unsigned_coef) + intercept
else:
predictions = omp_obj.predict(base_predictions)
return predictions
if __name__ == "__main__": if __name__ == "__main__":
forest = RandomForestClassifier(n_estimators=10) forest = RandomForestClassifier(n_estimators=10)
X = np.random.rand(10, 5) X = np.random.rand(10, 5)
y = np.random.choice([-1, +1], 10) y = np.random.choice([-1, +1], 10)
forest.fit(X, y) forest.fit(X, y)
print(forest.predict(np.random.rand(10, 5))) print(forest.predict(np.random.rand(10, 5)))
\ No newline at end of file
...@@ -60,7 +60,6 @@ def binarize_class_data(data, class_pos, inplace=True): ...@@ -60,7 +60,6 @@ def binarize_class_data(data, class_pos, inplace=True):
""" """
if not inplace: if not inplace:
data = deepcopy(data) data = deepcopy(data)
position_class_labels = (data == class_pos) position_class_labels = (data == class_pos)
data[~(position_class_labels)] = -1 data[~(position_class_labels)] = -1
data[(position_class_labels)] = +1 data[(position_class_labels)] = +1
......
...@@ -6,18 +6,15 @@ ...@@ -6,18 +6,15 @@
"normalize_D": false, "normalize_D": false,
"dataset_normalizer": "standard", "dataset_normalizer": "standard",
"forest_size": null, "forest_size": null,
"extracted_forest_size_samples": 5, "extracted_forest_size_samples": 10,
"extracted_forest_size_stop": 0.05, "extracted_forest_size_stop": 0.4,
"models_dir": "models/boston/stage1", "models_dir": "models/boston/stage1",
"dev_size": 0.2, "dev_size": 0.2,
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 2078,
2, 90
3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
...@@ -30,10 +27,15 @@ ...@@ -30,10 +27,15 @@
"job_number": -1, "job_number": -1,
"extraction_strategy": "none", "extraction_strategy": "none",
"extracted_forest_size": [ "extracted_forest_size": [
8, 36,
17, 73,
25, 109,
33, 145,
42 182,
218,
255,
291,
327,
364
] ]
} }
\ No newline at end of file
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
1, 58,
2, 43535,
3, 234234
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
results/iris/stage1/losses.png

35.2 KiB | W: | H:

results/iris/stage1/losses.png

64.7 KiB | W: | H:

results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
  • 2-up
  • Swipe
  • Onion skin
results/iris/stage2/losses.png

30.5 KiB

results/iris/stage3/losses.png

30.6 KiB

results/iris/stage4/losses.png

30.7 KiB

python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=california_housing --models_dir=models/california_housing/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=california_housing --models_dir=models/california_housing/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=boston --models_dir=models/boston/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=boston --models_dir=models/boston/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=iris --models_dir=models/iris/stage3 python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=iris --models_dir=models/iris/stage1
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=diabetes --models_dir=models/diabetes/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=diabetes --models_dir=models/diabetes/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=digits --models_dir=models/digits/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=digits --models_dir=models/digits/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=linnerud --models_dir=models/linnerud/stage3 python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=linnerud --models_dir=models/linnerud/stage3
......
#!/bin/bash #!/bin/bash
core_number=10 core_number=5
walltime=1:00 walltime=1:00
seeds='1 2 3' seeds='58 43535 234234'
for dataset in diamonds for dataset in iris
do do
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=1 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=1 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=2 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=2 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=5:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=3 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=3 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=4 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=4 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=5 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=5 --models_dir=models/$dataset/stage1
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=5:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=6 --models_dir=models/$dataset/stage1" python code/train.py --dataset_name=$dataset --seeds $seeds --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=6 --models_dir=models/$dataset/stage1
done done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment