Skip to content
Snippets Groups Projects
Commit 1379c412 authored by Léo Bouscarrat's avatar Léo Bouscarrat
Browse files

Change for multiclass

parent c80ddd61
No related branches found
No related tags found
2 merge requests!11Resolve "Correction of multiclass classif",!9Resolve "Experiment pipeline"
This commit is part of merge request !11. Comments created here will be created in the context of that merge request.
Showing
with 88 additions and 58 deletions
......@@ -24,6 +24,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
return self._base_forest_estimator.score(X, y)
def _base_estimator_predictions(self, X):
# We need to use predict_proba to get the probabilities of each class
return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
@property
......@@ -66,7 +67,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
if normalize_weights:
# we can normalize weights (by their sum) so that they sum to 1
# and they can be interpreted as impact percentages for interpretability.
# this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef)
# this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why
# question: je comprend pas le truc avec nonszero?
# predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
......
......@@ -60,7 +60,7 @@ class OmpForestMulticlassClassifier(OmpForest):
for class_label in possible_classes:
atoms_binary = binarize_class_data(atoms, class_label, inplace=False)
objective_binary = binarize_class_data(objective, class_label, inplace=False)
# todo peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
# TODO: peut etre considérer que la taille de forêt est globale et donc seulement une fraction est disponible pour chaque OMP...
omp_class = OrthogonalMatchingPursuit(
n_nonzero_coefs=self.models_parameters.extracted_forest_size,
fit_intercept=True, normalize=False)
......@@ -69,7 +69,9 @@ class OmpForestMulticlassClassifier(OmpForest):
return self._dct_class_omp
def predict(self, X):
forest_predictions = self._base_estimator_predictions(X)
'''forest_predictions = self._base_estimator_predictions(X)
print(forest_predictions.shape)
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
......@@ -79,9 +81,26 @@ class OmpForestMulticlassClassifier(OmpForest):
for class_label, omp_class in self._dct_class_omp.items():
label_names.append(class_label)
atoms_binary = binarize_class_data(forest_predictions, class_label, inplace=False)
print(atoms_binary.shape)
preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
# todo verifier que ce n'est pas bugué ici
# TODO: verifier que ce n'est pas bugué ici
preds = np.array(preds).T'''
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
label_names = []
preds = []
num_class = 0
for class_label, omp_class in self._dct_class_omp.items():
label_names.append(class_label)
atoms_binary = (forest_predictions[num_class] - 0.5) * 2 # centré réduit de 0/1 à -1/1
preds.append(self._make_omp_weighted_prediction(atoms_binary, omp_class, self._models_parameters.normalize_weights))
num_class += 1
preds = np.array(preds).T
max_preds = np.argmax(preds, axis=1)
......@@ -97,6 +116,27 @@ class OmpForestMulticlassClassifier(OmpForest):
return evaluation
@staticmethod
def _make_omp_weighted_prediction(base_predictions, omp_obj, normalize_weights=False):
if normalize_weights:
# we can normalize weights (by their sum) so that they sum to 1
# and they can be interpreted as impact percentages for interpretability.
# this necessits to remove the (-) in weights, e.g. move it to the predictions (use unsigned_coef) --> I don't see why
# question: je comprend pas le truc avec nonszero?
# predictions = self._omp.predict(forest_predictions) * (1 / (np.sum(self._omp.coef_) / len(np.nonzero(self._omp.coef_))))
coef_signs = np.sign(omp_obj.coef_)[np.newaxis, :] # add axis to make sure it will be broadcasted line-wise (there might be a confusion when forest_prediction is square)
unsigned_coef = (coef_signs * omp_obj.coef_).squeeze()
intercept = omp_obj.intercept_
adjusted_forest_predictions = base_predictions * coef_signs
predictions = adjusted_forest_predictions.dot(unsigned_coef) + intercept
else:
predictions = omp_obj.predict(base_predictions)
return predictions
if __name__ == "__main__":
forest = RandomForestClassifier(n_estimators=10)
......
......@@ -60,7 +60,6 @@ def binarize_class_data(data, class_pos, inplace=True):
"""
if not inplace:
data = deepcopy(data)
position_class_labels = (data == class_pos)
data[~(position_class_labels)] = -1
data[(position_class_labels)] = +1
......
......@@ -6,18 +6,15 @@
"normalize_D": false,
"dataset_normalizer": "standard",
"forest_size": null,
"extracted_forest_size_samples": 5,
"extracted_forest_size_stop": 0.05,
"extracted_forest_size_samples": 10,
"extracted_forest_size_stop": 0.4,
"models_dir": "models/boston/stage1",
"dev_size": 0.2,
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
2078,
90
],
"subsets_used": "train,dev",
"normalize_weights": false,
......@@ -30,10 +27,15 @@
"job_number": -1,
"extraction_strategy": "none",
"extracted_forest_size": [
8,
17,
25,
33,
42
36,
73,
109,
145,
182,
218,
255,
291,
327,
364
]
}
\ No newline at end of file
......@@ -13,11 +13,9 @@
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
58,
43535,
234234
],
"subsets_used": "train,dev",
"normalize_weights": false,
......
......@@ -13,11 +13,9 @@
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
58,
43535,
234234
],
"subsets_used": "train,dev",
"normalize_weights": false,
......
......@@ -13,11 +13,9 @@
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
58,
43535,
234234
],
"subsets_used": "train,dev",
"normalize_weights": false,
......
......@@ -13,11 +13,9 @@
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
58,
43535,
234234
],
"subsets_used": "train,dev",
"normalize_weights": false,
......
......@@ -13,11 +13,9 @@
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
58,
43535,
234234
],
"subsets_used": "train,dev",
"normalize_weights": false,
......
......@@ -13,11 +13,9 @@
"test_size": 0.2,
"random_seed_number": 1,
"seeds": [
1,
2,
3,
4,
5
58,
43535,
234234
],
"subsets_used": "train,dev",
"normalize_weights": false,
......
results/iris/stage1/losses.png

35.2 KiB | W: | H:

results/iris/stage1/losses.png

64.7 KiB | W: | H:

results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
  • 2-up
  • Swipe
  • Onion skin
results/iris/stage2/losses.png

30.5 KiB

results/iris/stage3/losses.png

30.6 KiB

results/iris/stage4/losses.png

30.7 KiB

python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=california_housing --models_dir=models/california_housing/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=boston --models_dir=models/boston/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=iris --models_dir=models/iris/stage3
python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=iris --models_dir=models/iris/stage1
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=diabetes --models_dir=models/diabetes/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=digits --models_dir=models/digits/stage3
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=linnerud --models_dir=models/linnerud/stage3
......
#!/bin/bash
core_number=10
core_number=5
walltime=1:00
seeds='1 2 3'
seeds='58 43535 234234'
for dataset in diamonds
for dataset in iris
do
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=1 --models_dir=models/$dataset/stage1"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=2 --models_dir=models/$dataset/stage1"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=5:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds 5 --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=3 --models_dir=models/$dataset/stage1"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=4 --models_dir=models/$dataset/stage1"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=5 --models_dir=models/$dataset/stage1"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=5:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=10 --experiment_id=6 --models_dir=models/$dataset/stage1"
python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --save_experiment_configuration 1 none_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=1 --models_dir=models/$dataset/stage1
python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --save_experiment_configuration 1 random_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=2 --models_dir=models/$dataset/stage1
python code/train.py --dataset_name=$dataset --seeds $seeds --save_experiment_configuration 1 omp_with_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=3 --models_dir=models/$dataset/stage1
python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --skip_best_hyperparams --save_experiment_configuration 1 none_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=4 --models_dir=models/$dataset/stage1
python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --skip_best_hyperparams --save_experiment_configuration 1 random_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=5 --models_dir=models/$dataset/stage1
python code/train.py --dataset_name=$dataset --seeds $seeds --skip_best_hyperparams --save_experiment_configuration 1 omp_wo_params --extracted_forest_size_stop=0.05 --extracted_forest_size_samples=5 --experiment_id=6 --models_dir=models/$dataset/stage1
done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment