Something went wrong on our end
Select Git revision
Trainer.hpp
-
Franck Dary authoredFranck Dary authored
Trainer.hpp 5.30 KiB
/// @file Trainer.hpp
/// @author Franck Dary
/// @version 1.0
/// @date 2018-08-03
#ifndef TRAINER__H
#define TRAINER__H
#include "TapeMachine.hpp"
#include "BD.hpp"
#include "Config.hpp"
/// @brief An object capable of training a TapeMachine given a BD initialized with training examples.
class Trainer
{
public :
/// @brief The absolute path in which this experience (training) is taking place
std::string expPath;
private :
/// @brief The TapeMachine that will be trained.
TapeMachine & tm;
/// @brief The BD initialized with training examples.
BD & trainBD;
/// @brief The configuration of the TapeMachine while processing trainBD.
Config & trainConfig;
/// @brief The BD initialized with dev examples.
/// Can be nullptr if dev is not used in this training.
BD * devBD;
/// @brief The configuration of the TapeMachine while processing devBD.
// Can be nullptr if dev is not used in this training.
Config * devConfig;
public :
using FD = FeatureModel::FeatureDescription;
using Example = std::pair<int, FD>;
using ExamplesIter = std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator;
private :
/// @brief Train the TapeMachine using batches of examples.
/// For each epoch all the Classifier of the TapeMachine are fed all the
/// training examples, at the end of the epoch Classifier are evaluated on
/// the devBD if available, and each Classifier will be saved only if its score
/// on the current epoch is its all time best.\n
/// When a Classifier is saved that way, all the Dict involved are also saved.
/// @param nbIter The number of epochs.
/// @param batchSize The size of each batch (in number of examples).
/// @param mustShuffle Will the examples be shuffled after every epoch ?
void trainBatched(int nbIter, int batchSize, bool mustShuffle);
/// @brief Extract training examples for all Classifier
///
/// @param examples The map that will be filled by this function.
/// @param config The configuration from which the examples will be extracted.
void getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config);
/// @brief Make each Classifier go over every examples.
/// Depending on getScoreOnBatch, it can update the parameters or not.
/// @param examples Map each trainable Classifier with a set of examples.
/// @param batchSize The batch size to use.
/// @param nbExamples Map each trainable Classifier to a count of how many examples it has seen during this epoch and a count of how many of this examples it has correctly classified. This map is filled by this function.
/// @param getScoreOnBatch The MLP function that must be called to get the score of a classifier on a certain batch.
void processAllExamples(
std::map<Classifier*, MLP::Examples> & examples,
int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch);
/// @brief Print the score obtained by all Classifier on this epoch.
///
/// @param output Where to print the output.
/// @param nbExamplesTrain Map each trainable Classifier to a count of how many train examples it has seen during this epoch and a count of how many of this examples it has correctly classified.
/// @param nbExamplesDev Map each trainable Classifier to a count of how many dev examples it has seen during this epoch and a count of how many of this examples it has correctly classified.
/// @param trainScores The scores obtained by each Classifier on the train set.
/// @param devScores The scores obtained by each Classifier on the train set.
/// @param bestIter Map each classifier to its best epoch. It is updated by this function.
/// @param nbIter The total number of epoch of the training.
/// @param curIter The current epoch of the training.
void printIterationScores(FILE * output,
std::map< std::string, std::pair<int, int> > & nbExamplesTrain,
std::map< std::string, std::pair<int, int> > & nbExamplesDev,
std::map< std::string, std::vector<float> > & trainScores,
std::map< std::string, std::vector<float> > & devScores,
std::map<std::string, int> & bestIter,
int nbIter, int curIter);
/// @brief For every Classifier, shuffle its training examples.
///
/// @param examples Map each Classifier to a set of training examples.
void shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples);
public :
/// @brief Construct a new Trainer without a dev set.
///
/// @param tm The TapeMachine to use.
/// @param bd The BD to use.
/// @param config The config to use.
Trainer(TapeMachine & tm, BD & bd, Config & config);
/// @brief Construct a new Trainer with a dev set.
///
/// @param tm The TapeMachine to use.
/// @param bd The BD corresponding to the training dataset.
/// @param config The Config corresponding to bd.
/// @param devBD The BD corresponding to the dev dataset.
/// @param devConfig The Config corresponding to devBD.
Trainer(TapeMachine & tm, BD & bd, Config & config, BD * devBD, Config * devConfig);
/// @brief Train the TapeMachine.
///
/// @param nbIter The number of training epochs.
/// @param batchSize The size of each batch.
/// @param mustShuffle Will the examples be shuffled after every epoch ?
void train(int nbIter, int batchSize, bool mustShuffle);
};
#endif