Skip to content
Snippets Groups Projects
Commit a0f7c96f authored by Charly Lamothe's avatar Charly Lamothe
Browse files

- Split train function in three distinct functions;

- Update TODO list.
parent f866e30d
No related branches found
No related tags found
1 merge request!3clean scripts
* Trouver des jeux de données pertinents * Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
* Entraîner et tester des forêts de différentes tailles * Fix ModelFactory.load function.
* Entraîner et tester en regression et classification * Fix model results loading in compute_results.py.
* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images) * Check that omp multiclasses classifier is working as expected.
* Entraîner avec différents hyperparamètres (d, profondeur, epsilon) * In the bayesian search computation, output a different file name depending on the task of the trained model.
* Appliquer OMP avec différentes valeurs de k (notamment un petit k) * Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
* Faire des figures * Prepare the json experiment files to run.
* Implémenter et comparer les systèmes concurrents \ No newline at end of file
\ No newline at end of file
...@@ -20,48 +20,51 @@ class Trainer(object): ...@@ -20,48 +20,51 @@ class Trainer(object):
self._dataset = dataset self._dataset = dataset
self._logger = LoggerFactory.create(LOG_PATH, __name__) self._logger = LoggerFactory.create(LOG_PATH, __name__)
def train(self, model, models_dir): def init(self, model):
"""
:param model: Object with
:param models_dir: Where the results will be saved
:return:
"""
# todo cette fonction ne fait pas que "train", elle choisit le jeu de données, train et evalue le modèle -> nom à changer
self._logger.debug('Training model using train set...')
begin_time = time.time()
if model.models_parameters.subsets_used == 'train,dev': if model.models_parameters.subsets_used == 'train,dev':
X_forest = self._dataset.X_train self._X_forest = self._dataset.X_train
y_forest = self._dataset.y_train self._y_forest = self._dataset.y_train
X_omp = self._dataset.X_dev self._X_omp = self._dataset.X_dev
y_omp = self._dataset.y_dev self._y_omp = self._dataset.y_dev
self._logger.debug('Fitting the forest on train subset and OMP on dev subset.') self._logger.debug('Fitting the forest on train subset and OMP on dev subset.')
elif model.models_parameters.subsets_used == 'train+dev,train+dev': elif model.models_parameters.subsets_used == 'train+dev,train+dev':
X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev]) self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
X_omp = X_forest self._X_omp = self._X_forest
y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev]) self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
y_omp = y_forest self._y_omp = self._y_forest
self._logger.debug('Fitting both the forest and OMP on train+dev subsets.') self._logger.debug('Fitting both the forest and OMP on train+dev subsets.')
elif model.models_parameters.subsets_used == 'train,train+dev': elif model.models_parameters.subsets_used == 'train,train+dev':
X_forest = self._dataset.X_train self._X_forest = self._dataset.X_train
y_forest = self._dataset.y_train self._y_forest = self._dataset.y_train
X_omp = np.concatenate([self._dataset.X_train, self._dataset.X_dev]) self._X_omp = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
y_omp = np.concatenate([self._dataset.y_train, self._dataset.y_dev]) self._y_omp = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
else: else:
raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used)) raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
def train(self, model):
"""
:param model: Object with
:return:
"""
self._logger.debug('Training model using train set...')
self._begin_time = time.time()
model.fit( model.fit(
X_forest=X_forest, X_forest=self._X_forest,
y_forest=y_forest, y_forest=self._y_forest,
X_omp=X_omp, X_omp=self._X_omp,
y_omp=y_omp y_omp=self._y_omp
) )
end_time = time.time() self._end_time = time.time()
def compute_results(self, model, models_dir):
"""
:param model: Object with
:param models_dir: Where the results will be saved
"""
results = ModelRawResults( results = ModelRawResults(
model_object=model, model_object=model,
training_time=end_time - begin_time, training_time=self._end_time - self._begin_time,
datetime=datetime.datetime.now(), datetime=datetime.datetime.now(),
train_score=model.score(self._dataset.X_train, self._dataset.y_train), train_score=model.score(self._dataset.X_train, self._dataset.y_train),
dev_score=model.score(self._dataset.X_dev, self._dataset.y_dev), dev_score=model.score(self._dataset.X_dev, self._dataset.y_dev),
......
...@@ -69,7 +69,9 @@ def process_job(seed, parameters, experiment_id, hyperparameters): ...@@ -69,7 +69,9 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
model = ModelFactory.build(dataset.task, model_parameters) model = ModelFactory.build(dataset.task, model_parameters)
trainer.train(model, sub_models_dir) trainer.init(model)
trainer.train(model)
trainer.compute_results(model, sub_models_dir)
logger.info('Training done') logger.info('Training done')
if __name__ == "__main__": if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment