Skip to content
Snippets Groups Projects
Commit a0f7c96f authored by Charly Lamothe's avatar Charly Lamothe
Browse files

- Split train function in three distinct functions;

- Update TODO list.
parent f866e30d
No related branches found
No related tags found
1 merge request!3clean scripts
* Trouver des jeux de données pertinents
* Entraîner et tester des forêts de différentes tailles
* Entraîner et tester en regression et classification
* Entraîner et tester sur différentes modalités (pas seulement des datasets d'images)
* Entraîner avec différents hyperparamètres (d, profondeur, epsilon)
* Appliquer OMP avec différentes valeurs de k (notamment un petit k)
* Faire des figures
* Implémenter et comparer les systèmes concurrents
\ No newline at end of file
* Fix pickle loading of ModelRawResults, because saving the model_object leads import issues.
* Fix ModelFactory.load function.
* Fix model results loading in compute_results.py.
* Check that omp multiclasses classifier is working as expected.
* In the bayesian search computation, output a different file name depending on the task of the trained model.
* Check the best params scores of the regressors (neg_mean_squared_error leads to huge negative values).
* Prepare the json experiment files to run.
\ No newline at end of file
......@@ -20,48 +20,51 @@ class Trainer(object):
self._dataset = dataset
self._logger = LoggerFactory.create(LOG_PATH, __name__)
def train(self, model, models_dir):
"""
:param model: Object with
:param models_dir: Where the results will be saved
:return:
"""
# todo cette fonction ne fait pas que "train", elle choisit le jeu de données, train et evalue le modèle -> nom à changer
self._logger.debug('Training model using train set...')
begin_time = time.time()
def init(self, model):
if model.models_parameters.subsets_used == 'train,dev':
X_forest = self._dataset.X_train
y_forest = self._dataset.y_train
X_omp = self._dataset.X_dev
y_omp = self._dataset.y_dev
self._X_forest = self._dataset.X_train
self._y_forest = self._dataset.y_train
self._X_omp = self._dataset.X_dev
self._y_omp = self._dataset.y_dev
self._logger.debug('Fitting the forest on train subset and OMP on dev subset.')
elif model.models_parameters.subsets_used == 'train+dev,train+dev':
X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
X_omp = X_forest
y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
y_omp = y_forest
self._X_forest = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
self._X_omp = self._X_forest
self._y_forest = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
self._y_omp = self._y_forest
self._logger.debug('Fitting both the forest and OMP on train+dev subsets.')
elif model.models_parameters.subsets_used == 'train,train+dev':
X_forest = self._dataset.X_train
y_forest = self._dataset.y_train
X_omp = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
y_omp = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
self._X_forest = self._dataset.X_train
self._y_forest = self._dataset.y_train
self._X_omp = np.concatenate([self._dataset.X_train, self._dataset.X_dev])
self._y_omp = np.concatenate([self._dataset.y_train, self._dataset.y_dev])
else:
raise ValueError("Unknown specified subsets_used parameter '{}'".format(model.models_parameters.subsets_used))
def train(self, model):
"""
:param model: Object with
:return:
"""
self._logger.debug('Training model using train set...')
self._begin_time = time.time()
model.fit(
X_forest=X_forest,
y_forest=y_forest,
X_omp=X_omp,
y_omp=y_omp
X_forest=self._X_forest,
y_forest=self._y_forest,
X_omp=self._X_omp,
y_omp=self._y_omp
)
end_time = time.time()
self._end_time = time.time()
def compute_results(self, model, models_dir):
"""
:param model: Object with
:param models_dir: Where the results will be saved
"""
results = ModelRawResults(
model_object=model,
training_time=end_time - begin_time,
training_time=self._end_time - self._begin_time,
datetime=datetime.datetime.now(),
train_score=model.score(self._dataset.X_train, self._dataset.y_train),
dev_score=model.score(self._dataset.X_dev, self._dataset.y_dev),
......
......@@ -69,7 +69,9 @@ def process_job(seed, parameters, experiment_id, hyperparameters):
model = ModelFactory.build(dataset.task, model_parameters)
trainer.train(model, sub_models_dir)
trainer.init(model)
trainer.train(model)
trainer.compute_results(model, sub_models_dir)
logger.info('Training done')
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment