Skip to content
Snippets Groups Projects
Select Git revision
  • master default protected
  • fullUD
  • movementInAction
3 results

Trainer.hpp

Blame
  • Trainer.hpp 5.30 KiB
    /// @file Trainer.hpp
    /// @author Franck Dary
    /// @version 1.0
    /// @date 2018-08-03
    
    #ifndef TRAINER__H
    #define TRAINER__H
    
    #include "TapeMachine.hpp"
    #include "BD.hpp"
    #include "Config.hpp"
    
    /// @brief An object capable of training a TapeMachine given a BD initialized with training examples.
    class Trainer
    {
      public :
    
      /// @brief The absolute path in which this experience (training) is taking place
      std::string expPath;
    
      private :
    
      /// @brief The TapeMachine that will be trained.
      TapeMachine & tm;
      /// @brief The BD initialized with training examples.
      BD & trainBD;
      /// @brief The configuration of the TapeMachine while processing trainBD.
      Config & trainConfig;
    
      /// @brief The BD initialized with dev examples.
      /// Can be nullptr if dev is not used in this training.
      BD * devBD;
      /// @brief The configuration of the TapeMachine while processing devBD.
      // Can be nullptr if dev is not used in this training.
      Config * devConfig;
    
      public :
    
      using FD = FeatureModel::FeatureDescription;
      using Example = std::pair<int, FD>;
      using ExamplesIter = std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator;
    
      private :
    
      /// @brief Train the TapeMachine using batches of examples.
      /// For each epoch all the Classifier of the TapeMachine are fed all the 
      /// training examples, at the end of the epoch Classifier are evaluated on 
      /// the devBD if available, and each Classifier will be saved only if its score
      /// on the current epoch is its all time best.\n
      /// When a Classifier is saved that way, all the Dict involved are also saved.
      /// @param nbIter The number of epochs.
      /// @param batchSize The size of each batch (in number of examples).
      /// @param mustShuffle Will the examples be shuffled after every epoch ?
      void trainBatched(int nbIter, int batchSize, bool mustShuffle);
      /// @brief Extract training examples for all Classifier
      ///
      /// @param examples The map that will be filled by this function.
      /// @param config The configuration from which the examples will be extracted.
      void getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config);
    
      /// @brief Make each Classifier go over every examples.
      /// Depending on getScoreOnBatch, it can update the parameters or not.
      /// @param examples Map each trainable Classifier with a set of examples.
      /// @param batchSize The batch size to use.
      /// @param nbExamples Map each trainable Classifier to a count of how many examples it has seen during this epoch and a count of how many of this examples it has correctly classified. This map is filled by this function.
      /// @param getScoreOnBatch The MLP function that must be called to get the score of a classifier on a certain batch.
      void processAllExamples(
        std::map<Classifier*, MLP::Examples> & examples,
        int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
        std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch);
    
      /// @brief Print the score obtained by all Classifier on this epoch.
      ///
      /// @param output Where to print the output.
      /// @param nbExamplesTrain Map each trainable Classifier to a count of how many train examples it has seen during this epoch and a count of how many of this examples it has correctly classified.
      /// @param nbExamplesDev Map each trainable Classifier to a count of how many dev examples it has seen during this epoch and a count of how many of this examples it has correctly classified.
      /// @param trainScores The scores obtained by each Classifier on the train set.
      /// @param devScores The scores obtained by each Classifier on the train set.
      /// @param bestIter Map each classifier to its best epoch. It is updated by this function.
      /// @param nbIter The total number of epoch of the training.
      /// @param curIter The current epoch of the training.
      void printIterationScores(FILE * output,
        std::map< std::string, std::pair<int, int> > & nbExamplesTrain,
        std::map< std::string, std::pair<int, int> > & nbExamplesDev,
        std::map< std::string, std::vector<float> > & trainScores,
        std::map< std::string, std::vector<float> > & devScores,
        std::map<std::string, int> & bestIter,
        int nbIter, int curIter);
    
      /// @brief For every Classifier, shuffle its training examples.
      ///
      /// @param examples Map each Classifier to a set of training examples.
      void shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples);
    
      public :
    
      /// @brief Construct a new Trainer without a dev set.
      ///
      /// @param tm The TapeMachine to use.
      /// @param bd The BD to use.
      /// @param config The config to use.
      Trainer(TapeMachine & tm, BD & bd, Config & config);
      /// @brief Construct a new Trainer with a dev set.
      ///
      /// @param tm The TapeMachine to use.
      /// @param bd The BD corresponding to the training dataset.
      /// @param config The Config corresponding to bd.
      /// @param devBD The BD corresponding to the dev dataset.
      /// @param devConfig The Config corresponding to devBD.
      Trainer(TapeMachine & tm, BD & bd, Config & config, BD * devBD, Config * devConfig);
      /// @brief Train the TapeMachine.
      ///
      /// @param nbIter The number of training epochs.
      /// @param batchSize The size of each batch.
      /// @param mustShuffle Will the examples be shuffled after every epoch ?
      void train(int nbIter, int batchSize, bool mustShuffle);
    };
    
    #endif