Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results

Target

Select target project
  • luc.giffon/bolsonaro
1 result
Select Git revision
  • 12-experiment-pipeline
  • 13-visualization
  • 14-correction-of-multiclass-classif
  • 15-integration-sota
  • 17-adding-new-datasets
  • 19-add-some-tests
  • 20-coherence-des-arbres-de-predictions
  • 24-non-negative-omp
  • correlation
  • master
  • archive/10-gridsearching-of-the-base-forest
  • archive/4-implement-omp_forest_classifier
  • archive/5-add-plots-2
  • archive/Leo_Add_first_notebook
  • archive/farah_notation_and_related_work
  • archive/wip_clean_scripts
16 results
Show changes
Commits on Source (10)
...@@ -6,12 +6,12 @@ import datetime ...@@ -6,12 +6,12 @@ import datetime
class ModelRawResults(object): class ModelRawResults(object):
def __init__(self, model_object, training_time, def __init__(self, model_weights, training_time,
datetime, train_score, dev_score, test_score, datetime, train_score, dev_score, test_score,
train_score_base, dev_score_base, train_score_base, dev_score_base,
test_score_base, score_metric, base_score_metric): test_score_base, score_metric, base_score_metric):
self._model_object = model_object self._model_weights = model_weights
self._training_time = training_time self._training_time = training_time
self._datetime = datetime self._datetime = datetime
self._train_score = train_score self._train_score = train_score
...@@ -24,8 +24,8 @@ class ModelRawResults(object): ...@@ -24,8 +24,8 @@ class ModelRawResults(object):
self._base_score_metric = base_score_metric self._base_score_metric = base_score_metric
@property @property
def model_object(self): def model_weights(self):
return self.model_object return self.model_weights
@property @property
def training_time(self): def training_time(self):
...@@ -68,6 +68,8 @@ class ModelRawResults(object): ...@@ -68,6 +68,8 @@ class ModelRawResults(object):
return self._base_score_metric return self._base_score_metric
def save(self, models_dir): def save(self, models_dir):
if not os.path.exists(models_dir):
os.mkdir(models_dir)
save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle', save_obj_to_pickle(models_dir + os.sep + 'model_raw_results.pickle',
self.__dict__) self.__dict__)
......
...@@ -8,6 +8,7 @@ from sklearn.base import BaseEstimator ...@@ -8,6 +8,7 @@ from sklearn.base import BaseEstimator
class OmpForest(BaseEstimator, metaclass=ABCMeta): class OmpForest(BaseEstimator, metaclass=ABCMeta):
def __init__(self, models_parameters, base_forest_estimator): def __init__(self, models_parameters, base_forest_estimator):
self._base_forest_estimator = base_forest_estimator self._base_forest_estimator = base_forest_estimator
self._models_parameters = models_parameters self._models_parameters = models_parameters
...@@ -24,7 +25,6 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -24,7 +25,6 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
return self._base_forest_estimator.score(X, y) return self._base_forest_estimator.score(X, y)
def _base_estimator_predictions(self, X): def _base_estimator_predictions(self, X):
# We need to use predict_proba to get the probabilities of each class
return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T return np.array([tree.predict(X) for tree in self._base_forest_estimator.estimators_]).T
@property @property
...@@ -96,6 +96,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta): ...@@ -96,6 +96,7 @@ class OmpForest(BaseEstimator, metaclass=ABCMeta):
pass pass
class SingleOmpForest(OmpForest): class SingleOmpForest(OmpForest):
def __init__(self, models_parameters, base_forest_estimator): def __init__(self, models_parameters, base_forest_estimator):
# fit_intercept shouldn't be set to False as the data isn't necessarily centered here # fit_intercept shouldn't be set to False as the data isn't necessarily centered here
# normalization is handled outsite OMP # normalization is handled outsite OMP
...@@ -123,3 +124,24 @@ class SingleOmpForest(OmpForest): ...@@ -123,3 +124,24 @@ class SingleOmpForest(OmpForest):
forest_predictions /= self._forest_norms forest_predictions /= self._forest_norms
return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights) return self._make_omp_weighted_prediction(forest_predictions, self._omp, self._models_parameters.normalize_weights)
def predict_no_weights(self, X):
"""
Apply the SingleOmpForest to X without using the weights.
Make all the base tree predictions
:param X: a Forest
:return: a np.array of the predictions of the entire forest
"""
forest_predictions = self._base_estimator_predictions(X).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
weights = self._omp.coef_
omp_trees_indices = np.nonzero(weights)
select_trees = np.mean(forest_predictions[omp_trees_indices], axis=0)
return select_trees
...@@ -106,6 +106,36 @@ class OmpForestMulticlassClassifier(OmpForest): ...@@ -106,6 +106,36 @@ class OmpForestMulticlassClassifier(OmpForest):
max_preds = np.argmax(preds, axis=1) max_preds = np.argmax(preds, axis=1)
return np.array(label_names)[max_preds] return np.array(label_names)[max_preds]
def predict_no_weights(self, X):
"""
Apply the SingleOmpForest to X without using the weights.
Make all the base tree predictions
:param X: a Forest
:return: a np.array of the predictions of the entire forest
"""
forest_predictions = np.array([tree.predict_proba(X) for tree in self._base_forest_estimator.estimators_]).T
if self._models_parameters.normalize_D:
forest_predictions /= self._forest_norms
label_names = []
preds = []
num_class = 0
for class_label, omp_class in self._dct_class_omp.items():
weights = omp_class.coef_
omp_trees_indices = np.nonzero(weights)
label_names.append(class_label)
atoms_binary = (forest_predictions[num_class].T - 0.5) * 2 # centré réduit de 0/1 à -1/1
preds.append(np.sum(atoms_binary[omp_trees_indices], axis=0))
num_class += 1
preds = np.array(preds).T
max_preds = np.argmax(preds, axis=1)
return np.array(label_names)[max_preds]
def score(self, X, y, metric=DEFAULT_SCORE_METRIC): def score(self, X, y, metric=DEFAULT_SCORE_METRIC):
predictions = self.predict(X) predictions = self.predict(X)
......
...@@ -95,12 +95,18 @@ class Trainer(object): ...@@ -95,12 +95,18 @@ class Trainer(object):
) )
self._end_time = time.time() self._end_time = time.time()
def __score_func(self, model, X, y_true): def __score_func(self, model, X, y_true, weights=True):
if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]: if type(model) in [OmpForestRegressor, RandomForestRegressor, SimilarityForestRegressor]:
if weights:
y_pred = model.predict(X) y_pred = model.predict(X)
else:
y_pred = model.predict_no_weights(X)
result = self._regression_score_metric(y_true, y_pred) result = self._regression_score_metric(y_true, y_pred)
elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]: elif type(model) in [OmpForestBinaryClassifier, OmpForestMulticlassClassifier, RandomForestClassifier]:
if weights:
y_pred = model.predict(X) y_pred = model.predict(X)
else:
y_pred = model.predict_no_weights(X)
if type(model) is OmpForestBinaryClassifier: if type(model) is OmpForestBinaryClassifier:
y_pred = y_pred.round() y_pred = y_pred.round()
result = self._classification_score_metric(y_true, y_pred) result = self._classification_score_metric(y_true, y_pred)
...@@ -126,8 +132,17 @@ class Trainer(object): ...@@ -126,8 +132,17 @@ class Trainer(object):
:param model: Object with :param model: Object with
:param models_dir: Where the results will be saved :param models_dir: Where the results will be saved
""" """
model_weights = ''
if type(model) in [OmpForestRegressor, OmpForestBinaryClassifier]:
model_weights = model._omp.coef_
elif type(model) == OmpForestMulticlassClassifier:
model_weights = model._dct_class_omp
elif type(model) == OmpForestBinaryClassifier:
model_weights = model._omp
results = ModelRawResults( results = ModelRawResults(
model_object='', model_weights=model_weights,
training_time=self._end_time - self._begin_time, training_time=self._end_time - self._begin_time,
datetime=datetime.datetime.now(), datetime=datetime.datetime.now(),
train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train), train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train),
...@@ -148,3 +163,29 @@ class Trainer(object): ...@@ -148,3 +163,29 @@ class Trainer(object):
self._logger.info("Base performance on dev: {}".format(results.dev_score_base)) self._logger.info("Base performance on dev: {}".format(results.dev_score_base))
self._logger.info("Performance on dev: {}".format(results.dev_score)) self._logger.info("Performance on dev: {}".format(results.dev_score))
if type(model) not in [RandomForestRegressor, RandomForestClassifier]:
results = ModelRawResults(
model_object='',
training_time=self._end_time - self._begin_time,
datetime=datetime.datetime.now(),
train_score=self.__score_func(model, self._dataset.X_train, self._dataset.y_train, False),
dev_score=self.__score_func(model, self._dataset.X_dev, self._dataset.y_dev, False),
test_score=self.__score_func(model, self._dataset.X_test, self._dataset.y_test, False),
train_score_base=self.__score_func_base(model, self._dataset.X_train, self._dataset.y_train),
dev_score_base=self.__score_func_base(model, self._dataset.X_dev, self._dataset.y_dev),
test_score_base=self.__score_func_base(model, self._dataset.X_test, self._dataset.y_test),
score_metric=self._score_metric_name,
base_score_metric=self._base_score_metric_name
)
results.save(models_dir+'_no_weights')
self._logger.info("Base performance on test without weights: {}".format(results.test_score_base))
self._logger.info("Performance on test: {}".format(results.test_score))
self._logger.info("Base performance on train without weights: {}".format(results.train_score_base))
self._logger.info("Performance on train: {}".format(results.train_score))
self._logger.info("Base performance on dev without weights: {}".format(results.dev_score_base))
self._logger.info("Performance on dev: {}".format(results.dev_score))
...@@ -109,16 +109,16 @@ class Plotter(object): ...@@ -109,16 +109,16 @@ class Plotter(object):
fig, ax = plt.subplots() fig, ax = plt.subplots()
n = len(all_experiment_scores) nb_experiments = len(all_experiment_scores)
""" """
Get as many different colors from the specified cmap (here nipy_spectral) Get as many different colors from the specified cmap (here nipy_spectral)
as there are curve to plot. as there are curve to plot.
""" """
colors = Plotter.get_colors_from_cmap(n) colors = Plotter.get_colors_from_cmap(nb_experiments)
# For each curve to plot # For each curve to plot
for i in range(n): for i in range(nb_experiments):
# Retreive the scores in a list for each seed # Retreive the scores in a list for each seed
experiment_scores = list(all_experiment_scores[i].values()) experiment_scores = list(all_experiment_scores[i].values())
# Compute the mean and the std for the CI # Compute the mean and the std for the CI
......
...@@ -17,7 +17,7 @@ def retreive_extracted_forest_sizes_number(models_dir, experiment_id): ...@@ -17,7 +17,7 @@ def retreive_extracted_forest_sizes_number(models_dir, experiment_id):
extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes' extracted_forest_sizes_root_path = experiment_seed_path + os.sep + 'extracted_forest_sizes'
return len(os.listdir(extracted_forest_sizes_root_path)) return len(os.listdir(extracted_forest_sizes_root_path))
def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id): def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_dir, experiment_id, weights=True):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds experiment_seed_root_path = experiment_id_path + os.sep + 'seeds' # models/{experiment_id}/seeds
...@@ -28,6 +28,7 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -28,6 +28,7 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
experiment_train_scores = dict() experiment_train_scores = dict()
experiment_dev_scores = dict() experiment_dev_scores = dict()
experiment_test_scores = dict() experiment_test_scores = dict()
experiment_weights = dict()
all_extracted_forest_sizes = list() all_extracted_forest_sizes = list()
# Used to check if all losses were computed using the same metric (it should be the case) # Used to check if all losses were computed using the same metric (it should be the case)
...@@ -44,14 +45,19 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -44,14 +45,19 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
experiment_train_scores[seed] = list() experiment_train_scores[seed] = list()
experiment_dev_scores[seed] = list() experiment_dev_scores[seed] = list()
experiment_test_scores[seed] = list() experiment_test_scores[seed] = list()
experiment_weights[seed] = list()
# List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes # List the forest sizes in models/{experiment_id}/seeds/{seed}/extracted_forest_sizes
extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path) extracted_forest_sizes = os.listdir(extracted_forest_sizes_root_path)
extracted_forest_sizes = [nb_tree for nb_tree in extracted_forest_sizes if not 'no_weights' in nb_tree ]
extracted_forest_sizes.sort(key=int) extracted_forest_sizes.sort(key=int)
all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes))) all_extracted_forest_sizes.append(list(map(int, extracted_forest_sizes)))
for extracted_forest_size in extracted_forest_sizes: for extracted_forest_size in extracted_forest_sizes:
# models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size} # models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}
if weights:
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size
else:
extracted_forest_size_path = extracted_forest_sizes_root_path + os.sep + extracted_forest_size + '_no_weights'
# Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file # Load models/{experiment_id}/seeds/{seed}/extracted_forest_sizes/{extracted_forest_size}/model_raw_results.pickle file
model_raw_results = ModelRawResults.load(extracted_forest_size_path) model_raw_results = ModelRawResults.load(extracted_forest_size_path)
# Save the scores # Save the scores
...@@ -60,6 +66,8 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -60,6 +66,8 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
experiment_test_scores[seed].append(model_raw_results.test_score) experiment_test_scores[seed].append(model_raw_results.test_score)
# Save the metric # Save the metric
experiment_score_metrics.append(model_raw_results.score_metric) experiment_score_metrics.append(model_raw_results.score_metric)
# Save the weights
#experiment_weights[seed].append(model_raw_results.model_weights)
# Sanity checks # Sanity checks
if len(set(experiment_score_metrics)) > 1: if len(set(experiment_score_metrics)) > 1:
...@@ -67,7 +75,8 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d ...@@ -67,7 +75,8 @@ def extract_scores_across_seeds_and_extracted_forest_sizes(models_dir, results_d
if len(set([sum(extracted_forest_sizes) for extracted_forest_sizes in all_extracted_forest_sizes])) != 1: if len(set([sum(extracted_forest_sizes) for extracted_forest_sizes in all_extracted_forest_sizes])) != 1:
raise ValueError("The extracted forest sizes aren't the sames across seeds.") raise ValueError("The extracted forest sizes aren't the sames across seeds.")
return experiment_train_scores, experiment_dev_scores, experiment_test_scores, all_extracted_forest_sizes[0], experiment_score_metrics[0] return experiment_train_scores, experiment_dev_scores, experiment_test_scores, \
all_extracted_forest_sizes[0], experiment_score_metrics[0]#, experiment_weights
def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number): def extract_scores_across_seeds_and_forest_size(models_dir, results_dir, experiment_id, extracted_forest_sizes_number):
experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id} experiment_id_path = models_dir + os.sep + str(experiment_id) # models/{experiment_id}
...@@ -120,6 +129,7 @@ if __name__ == "__main__": ...@@ -120,6 +129,7 @@ if __name__ == "__main__":
DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results' DEFAULT_RESULTS_DIR = os.environ["project_dir"] + os.sep + 'results'
DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models' DEFAULT_MODELS_DIR = os.environ["project_dir"] + os.sep + 'models'
DEFAULT_PLOT_WEIGHT_DENSITY = False
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].') parser.add_argument('--stage', nargs='?', type=int, required=True, help='Specify the stage number among [1, 5].')
...@@ -130,6 +140,7 @@ if __name__ == "__main__": ...@@ -130,6 +140,7 @@ if __name__ == "__main__":
parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.') parser.add_argument('--dataset_name', nargs='?', type=str, required=True, help='Specify the dataset name. TODO: read it from models dir directly.')
parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.') parser.add_argument('--results_dir', nargs='?', type=str, default=DEFAULT_RESULTS_DIR, help='The output directory of the results.')
parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.') parser.add_argument('--models_dir', nargs='?', type=str, default=DEFAULT_MODELS_DIR, help='The output directory of the trained models.')
parser.add_argument('--plot_weight_density', action='store_true', default=DEFAULT_PLOT_WEIGHT_DENSITY, help='Plot the weight density. Only working for regressor models for now.')
args = parser.parse_args() args = parser.parse_args()
if args.stage not in list(range(1, 6)): if args.stage not in list(range(1, 6)):
...@@ -347,9 +358,17 @@ if __name__ == "__main__": ...@@ -347,9 +358,17 @@ if __name__ == "__main__":
extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1]) extract_scores_across_seeds_and_extracted_forest_sizes(args.models_dir, args.results_dir, args.experiment_ids[1])
# omp_with_params # omp_with_params
logger.info('Loading omp_with_params experiment scores...') logger.info('Loading omp_with_params experiment scores...')
"""omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
omp_with_params_experiment_score_metric, experiment_weights = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2])"""
omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \ omp_with_params_train_scores, omp_with_params_dev_scores, omp_with_params_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes( omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2]) args.models_dir, args.results_dir, args.experiment_ids[2])
#omp_with_params_without_weights
logger.info('Loading omp_with_params experiment scores...')
omp_with_params_without_weights_train_scores, omp_with_params_without_weights_dev_scores, omp_with_params_without_weights_test_scores, _, \
omp_with_params_experiment_score_metric = extract_scores_across_seeds_and_extracted_forest_sizes(
args.models_dir, args.results_dir, args.experiment_ids[2], weights=False)
"""# base_with_params """# base_with_params
logger.info('Loading base_with_params experiment scores 2...') logger.info('Loading base_with_params experiment scores 2...')
...@@ -369,17 +388,21 @@ if __name__ == "__main__": ...@@ -369,17 +388,21 @@ if __name__ == "__main__":
raise ValueError('Score metrics of all experiments must be the same.') raise ValueError('Score metrics of all experiments must be the same.')
experiments_score_metric = base_with_params_experiment_score_metric experiments_score_metric = base_with_params_experiment_score_metric
output_path = os.path.join(args.results_dir, args.dataset_name, 'stage4') output_path = os.path.join(args.results_dir, args.dataset_name, 'stage4_fix')
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True) pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
Plotter.plot_stage2_losses( Plotter.plot_stage2_losses(
file_path=output_path + os.sep + 'losses.png', file_path=output_path + os.sep + 'losses.png',
all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores], all_experiment_scores=[base_with_params_test_scores, random_with_params_test_scores, omp_with_params_test_scores,
all_labels=['base', 'random', 'omp'], omp_with_params_without_weights_test_scores],
all_labels=['base', 'random', 'omp', 'omp_without_weights'],
x_value=with_params_extracted_forest_sizes, x_value=with_params_extracted_forest_sizes,
xlabel='Number of trees extracted', xlabel='Number of trees extracted',
ylabel=experiments_score_metric, ylabel=experiments_score_metric,
title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name)) title='Loss values of {}\nusing best params of previous stages'.format(args.dataset_name))
# experiment_weights
#Plotter.weight_density(experiment_weights, output_path + os.sep + 'weight_density.png')
else: else:
raise ValueError('This stage number is not supported yet, but it will be!') raise ValueError('This stage number is not supported yet, but it will be!')
......
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
58, 1,
43535, 2,
234234 3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
"test_size": 0.2, "test_size": 0.2,
"random_seed_number": 1, "random_seed_number": 1,
"seeds": [ "seeds": [
58, 1,
43535, 2,
234234 3,
4,
5
], ],
"subsets_used": "train,dev", "subsets_used": "train,dev",
"normalize_weights": false, "normalize_weights": false,
......
results/boston/stage4/losses.png

43.7 KiB | W: 0px | H: 0px

results/boston/stage4/losses.png

110 KiB | W: 0px | H: 0px

results/boston/stage4/losses.png
results/boston/stage4/losses.png
results/boston/stage4/losses.png
results/boston/stage4/losses.png
  • 2-up
  • Swipe
  • Onion skin
results/iris/stage1/losses.png

64.7 KiB | W: 0px | H: 0px

results/iris/stage1/losses.png

66.1 KiB | W: 0px | H: 0px

results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
results/iris/stage1/losses.png
  • 2-up
  • Swipe
  • Onion skin
for dataset in diamonds california_housing boston iris diabetes digits linnerud wine breast_cancer olivetti_faces 20newsgroups_vectorized lfw_pairs seeds='1 2 3'
for dataset in boston iris diabetes digits linnerud wine breast_cancer olivetti_faces 20newsgroups_vectorized lfw_pairs california_housing diamonds
do do
python code/compute_results.py --stage=1 --experiment_ids 1 2 3 4 5 6 --dataset_name=$dataset --models_dir=models/$dataset/stage1
python code/compute_results.py --stage=2 --experiment_ids 1 2 3 4 --dataset_name=$dataset --models_dir=models/$dataset/stage2
python code/compute_results.py --stage=3 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage3
python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4 python code/compute_results.py --stage=4 --experiment_ids 1 2 3 --dataset_name=$dataset --models_dir=models/$dataset/stage4
done done
#!/bin/bash
core_number=5
walltime=1:00
seeds='1 2 3'
for dataset in diabetes #diamonds california_housing boston linnerud
do
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=none --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=1 --models_dir=models/$dataset/stage5 --subsets_used train+dev,train+dev"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=random --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=2 --models_dir=models/$dataset/stage5 --subsets_used train+dev,train+dev"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=omp --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=3 --models_dir=models/$dataset/stage5 --subsets_used train+dev,train+dev"
oarsub -p "(gpu is null)" -l /core=$core_number,walltime=1:00 "conda activate test_env && python code/train.py --dataset_name=$dataset --seeds $seeds --extraction_strategy=similarity --extracted_forest_size_stop=0.40 --extracted_forest_size_samples=30 --experiment_id=4 --models_dir=models/$dataset/stage5 --subsets_used train+dev,train+dev"
done