diff --git a/MLP/CMakeLists.txt b/MLP/CMakeLists.txt index a88a15e14f646b63d43f03d5665ed094d47eef56..0d6f0be647741ce34e5e65bc814e0c3e9a2164c7 100644 --- a/MLP/CMakeLists.txt +++ b/MLP/CMakeLists.txt @@ -3,3 +3,4 @@ FILE(GLOB SOURCES src/*.cpp) #compiling library add_library(MLP STATIC ${SOURCES}) target_link_libraries(MLP dynet) +target_link_libraries(MLP transition_machine) diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index 7713815f3fbe8d6b9b31e7097f306a2fd41f1c75..974e0c52963bcb49b7bba28c583d385e50c669fe 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -12,6 +12,7 @@ #include <dynet/timing.h> #include <dynet/expr.h> #include "FeatureModel.hpp" +#include "TrainingExamples.hpp" /// @brief Multi Layer Perceptron. /// @@ -21,9 +22,6 @@ class MLP { public : - /// @brief A sequence of training examples, accompanied by the order in which it has to be iterated over. - using Examples = std::pair< std::vector<int>, std::vector<std::pair<int, FeatureModel::FeatureDescription> > >; - /// @brief Activation function for a MLP Layer. enum Activation { @@ -194,24 +192,20 @@ class MLP /// @return A vector containing one score per possible class. std::vector<float> predict(FeatureModel::FeatureDescription & fd); - /// @brief Train the MLP on a batch of training examples. + /// @brief Train the MLP on these TrainingExamples. /// - /// The parameters will be updated by this function. - /// @param examples A set of training examples. - /// @param start The index of the first element of the batch. - /// @param end The index of the last element of the batch. + /// @param examples A batch of training examples. /// - /// @return The number of examples for which the class was correctly predicted by the MLP. - int trainOnBatch(Examples & examples, int start, int end); - /// @brief Get the score of the MLP on a batch of training examples. + /// @return The number of these training examples correctly classified by the MLP. + int trainOnBatch(TrainingExamples & examples); + + /// @brief Predict the class of training examples. /// - /// The parameters will not be updated by this function. - /// @param examples A set of training examples. - /// @param start The index of the first element of the batch. - /// @param end The index of the last element of the batch. + /// @param examples The training examples. /// - /// @return The number of examples for which the class was correctly predicted by the MLP. - int getScoreOnBatch(Examples & examples, int start, int end); + /// @return The number of these training examples correctly classified by the MLP. + int getScoreOnBatch(TrainingExamples & examples); + /// @brief Save the MLP to a file. /// /// @param filename The file to write the MLP to. diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp index 0c1238595c3e3dc32b30f1987c9fda47c508ace5..6fe13b37278781222ec31baa0449102c4e12b4a3 100644 --- a/MLP/src/MLP.cpp +++ b/MLP/src/MLP.cpp @@ -300,7 +300,7 @@ void MLP::printParameters(FILE * output) fprintf(output, "Parameters : NOT IMPLEMENTED\n"); } -int MLP::trainOnBatch(Examples & examples, int start, int end) +int MLP::trainOnBatch(TrainingExamples & examples) { dynet::ComputationGraph cg; std::vector<dynet::Expression> inputs; @@ -308,25 +308,24 @@ int MLP::trainOnBatch(Examples & examples, int start, int end) int inputDim = 0; int outputDim = layers.back().output_dim; - for(int i = start; i < end; i++) + for(unsigned int i = 0; i < examples.size(); i++) { - auto & order = examples.first; - int exampleIndex = order[i]; - auto & example = examples.second[exampleIndex]; + int index = examples.order[i]; + auto & example = examples.examples[index]; std::vector<dynet::Expression> expressions; expressions.clear(); - for (auto & featValue : example.second.values) + for (auto & featValue : example.values) expressions.emplace_back(featValue2Expression(cg, featValue)); inputs.emplace_back(dynet::concatenate(expressions)); inputDim = inputs.back().dim().rows(); - goldClasses.emplace_back((unsigned)example.first); + goldClasses.emplace_back((unsigned)examples.classes[index]); } dynet::Expression concatenation = dynet::concatenate(inputs); - int batchSize = end - start; + int batchSize = examples.size(); dynet::Expression batchedInput = reshape((concatenation), dynet::Dim({(unsigned)inputDim}, batchSize)); @@ -358,7 +357,7 @@ int MLP::trainOnBatch(Examples & examples, int start, int end) return nbCorrect; } -int MLP::getScoreOnBatch(Examples & examples, int start, int end) +int MLP::getScoreOnBatch(TrainingExamples & examples) { bool currentDropoutActive = dropoutActive; dropoutActive = false; @@ -369,25 +368,24 @@ int MLP::getScoreOnBatch(Examples & examples, int start, int end) int inputDim = 0; int outputDim = layers.back().output_dim; - for(int i = start; i < end; i++) + for(unsigned int i = 0; i < examples.size(); i++) { - auto & order = examples.first; - int exampleIndex = order[i]; - auto & example = examples.second[exampleIndex]; + int index = examples.order[i]; + auto & example = examples.examples[index]; std::vector<dynet::Expression> expressions; expressions.clear(); - for (auto & featValue : example.second.values) + for (auto & featValue : example.values) expressions.emplace_back(featValue2Expression(cg, featValue)); inputs.emplace_back(dynet::concatenate(expressions)); inputDim = inputs.back().dim().rows(); - goldClasses.emplace_back((unsigned)example.first); + goldClasses.emplace_back((unsigned)examples.classes[index]); } dynet::Expression concatenation = dynet::concatenate(inputs); - int batchSize = end - start; + int batchSize = examples.size(); dynet::Expression batchedInput = reshape((concatenation), dynet::Dim({(unsigned)inputDim}, batchSize)); diff --git a/trainer/CMakeLists.txt b/trainer/CMakeLists.txt index cf482827b9cd6612c6dfc99503bbdd5338bb6ae4..51bcc455c8b55c22a42eb65390eab0cd99b76e13 100644 --- a/trainer/CMakeLists.txt +++ b/trainer/CMakeLists.txt @@ -8,3 +8,4 @@ install(TARGETS macaon_train DESTINATION bin) #compiling library add_library(trainer STATIC ${SOURCES}) +target_link_libraries(trainer transition_machine) diff --git a/trainer/include/Trainer.hpp b/trainer/include/Trainer.hpp index f33e714e64414c837c832c2439babcb75da8f332..3a601e3e0289158e11e7b7ea9596603e65de542d 100644 --- a/trainer/include/Trainer.hpp +++ b/trainer/include/Trainer.hpp @@ -9,6 +9,7 @@ #include "TransitionMachine.hpp" #include "BD.hpp" #include "Config.hpp" +#include "TrainingExamples.hpp" /// @brief An object capable of training a TransitionMachine given a BD initialized with training examples. class Trainer @@ -40,10 +41,6 @@ class Trainer /// @brief The FeatureDescritpion of a Config. using FD = FeatureModel::FeatureDescription; - /// @brief A training example, a pair of a class to predict and the FeatureDescription of the corresponding Config. - using Example = std::pair<int, FD>; - /// @brief Iterator type of a vector of training examples. - using ExamplesIter = std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator; private : @@ -58,11 +55,13 @@ class Trainer /// @param batchSize The size of each batch (in number of examples). /// @param mustShuffle Will the examples be shuffled after every epoch ? void trainBatched(int nbIter, int batchSize, bool mustShuffle); - /// @brief Extract training examples for all Classifier + + /// @brief Uses a TM and a config to create the TrainingExamples that will be used during training. + /// + /// @param config The config to use. /// - /// @param examples The map that will be filled by this function. - /// @param config The configuration from which the examples will be extracted. - void getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config); + /// @return For each classifier, a set of training examples. + std::map<Classifier*,TrainingExamples> getExamplesByClassifier(Config & config); /// @brief Make each Classifier go over every examples. /// @@ -71,10 +70,10 @@ class Trainer /// @param batchSize The batch size to use. /// @param nbExamples Map each trainable Classifier to a count of how many examples it has seen during this epoch and a count of how many of this examples it has correctly classified. This map is filled by this function. /// @param getScoreOnBatch The MLP function that must be called to get the score of a classifier on a certain batch. - void processAllExamples( - std::map<Classifier*, MLP::Examples> & examples, - int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples, - std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch); +void processAllExamples( + std::map<Classifier*, TrainingExamples> & examples, + int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples, + std::function<int(Classifier *, TrainingExamples &)> getScoreOnBatch); /// @brief Print the score obtained by all Classifier on this epoch. /// @@ -97,7 +96,7 @@ class Trainer /// @brief For every Classifier, shuffle its training examples. /// /// @param examples Map each Classifier to a set of training examples. - void shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples); + void shuffleAllExamples(std::map<Classifier*,TrainingExamples> & examples); public : diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index b3c1c49bf567a38dde8a3923592137ffbaf74323..223cb17af1f4826fc0edb794ef5cab064596bc4c 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -13,8 +13,10 @@ Trainer::Trainer(TransitionMachine & tm, BD & bd, Config & config, BD * devBD, C } -void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config) +std::map<Classifier*,TrainingExamples> Trainer::getExamplesByClassifier(Config & config) { + std::map<Classifier*, TrainingExamples> examples; + while (!config.isFinal()) { TransitionMachine::State * currentState = tm.getCurrentState(); @@ -22,52 +24,46 @@ void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & exa config.setCurrentStateName(¤tState->name); classifier->initClassifier(config); - //config.printForDebug(stderr); - //fprintf(stderr, "State : %s\n", currentState->name.c_str()); - int neededActionIndex = classifier->getOracleActionIndex(config); std::string neededActionName = classifier->getActionName(neededActionIndex); if(classifier->needsTrain()) - { - examples[classifier].second.emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config))); - examples[classifier].first.emplace_back(examples[classifier].first.size()); - } + examples[classifier].add(classifier->getFeatureDescription(config), neededActionIndex); Action * action = classifier->getAction(neededActionName); if(!action->appliable(config)) fprintf(stderr, "WARNING (%s) : action \'%s\' is not appliable.\n", ERRINFO, neededActionName.c_str()); action->apply(config); - //fprintf(stderr, "Action : %s\n", neededActionName.c_str()); - TransitionMachine::Transition * transition = tm.getTransition(neededActionName); tm.takeTransition(transition); config.moveHead(transition->headMvt); } + + return examples; } void Trainer::processAllExamples( - std::map<Classifier*, MLP::Examples> & examples, + std::map<Classifier*, TrainingExamples> & examples, int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples, - std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch) + std::function<int(Classifier *, TrainingExamples &)> getScoreOnBatch) { for(auto & it : examples) { - int nbBatches = (it.second.second.size() / batchSize) + (it.second.second.size() % batchSize ? 1 : 0); - - for(int numBatch = 0; numBatch < nbBatches; numBatch++) + while(true) { - int currentBatchSize = std::min<int>(batchSize, it.second.second.size() - (numBatch * batchSize)); + TrainingExamples batch = it.second.getBatch(batchSize); - int batchStart = numBatch * batchSize; - int batchEnd = batchStart + currentBatchSize; + if (batch.size() == 0) + break; - int nbCorrect = getScoreOnBatch(it.first, examples[it.first], batchStart, batchEnd); + int nbCorrects = getScoreOnBatch(it.first, batch); - nbExamples[it.first->name].first += currentBatchSize; - nbExamples[it.first->name].second += nbCorrect; + nbExamples[it.first->name].first += batch.size(); + nbExamples[it.first->name].second += nbCorrects; } + + it.second.reset(); } } @@ -122,23 +118,23 @@ void Trainer::printIterationScores(FILE * output, printColumns(output, {names, acc, train, dev, saved}); } -void Trainer::shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples) +void Trainer::shuffleAllExamples(std::map<Classifier*, TrainingExamples> & examples) { for (auto & it : examples) - std::random_shuffle(it.second.first.begin(), it.second.first.end()); + it.second.shuffle(); } void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle) { - std::map<Classifier*, MLP::Examples > trainExamples; - std::map<Classifier*, MLP::Examples > devExamples; + std::map<Classifier*,TrainingExamples> trainExamples; + std::map<Classifier*,TrainingExamples> devExamples; fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str()); - getExamplesByClassifier(trainExamples, trainConfig); + trainExamples = getExamplesByClassifier(trainConfig); if(devBD && devConfig) - getExamplesByClassifier(devExamples, *devConfig); + devExamples = getExamplesByClassifier(*devConfig); auto & classifiers = tm.getClassifiers(); for(Classifier * cla : classifiers) @@ -158,15 +154,15 @@ void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle) shuffleAllExamples(trainExamples); processAllExamples(trainExamples, batchSize, nbExamplesTrain, - [](Classifier * c, MLP::Examples & ex, int s, int e) + [](Classifier * c, TrainingExamples & ex) { - return c->trainOnBatch(ex, s, e); + return c->trainOnBatch(ex); }); processAllExamples(devExamples, batchSize, nbExamplesDev, - [](Classifier * c, MLP::Examples & ex, int s, int e) + [](Classifier * c, TrainingExamples & ex) { - return c->getScoreOnBatch(ex, s, e); + return c->getScoreOnBatch(ex); }); printIterationScores(stderr, nbExamplesTrain, nbExamplesDev, diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp index 0dd55fa9b13fd17618e2850a631f7d7c1805f11f..d21747857c7adadd5571c305da229de81a997740 100644 --- a/transition_machine/include/Classifier.hpp +++ b/transition_machine/include/Classifier.hpp @@ -106,23 +106,18 @@ class Classifier /// /// @return The index of the correct Action to take. int getOracleActionIndex(Config & config); - /// @brief Test this Classifier against a batch of training examples. + /// @brief Predict the classes of these training examples. /// /// @param examples A set of training examples. - /// @param start The index of the start of the batch. - /// @param end The index of the end of the batch. /// - /// @return The number of examples correctly classified by this Classifier. - int getScoreOnBatch(MLP::Examples & examples, int start, int end); - /// @brief Train this Classifier with a batch of training examples. + /// @return The number of these training examples whose class has correctly been predicted. + int getScoreOnBatch(TrainingExamples & examples); + /// @brief Train this classifier of these TrainingExamples. /// - /// This function is similar to getScoreOnBatch, except that it update the neural network parameters in order to fit the batch examples more. - /// @param examples A set of training examples. - /// @param start The index of the start of the batch. - /// @param end The index of the end of the batch. + /// @param examples a batch of training examples. /// - /// @return The number of examples correctly classified by this Classifier. - int trainOnBatch(MLP::Examples & examples, int start, int end); + /// @return The number of these training examples whose class has correctly been predicted. + int trainOnBatch(TrainingExamples & examples); /// @brief Get the name of an Action from its index. /// /// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network. diff --git a/transition_machine/include/TrainingExamples.hpp b/transition_machine/include/TrainingExamples.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eeb30314d2bdd3d1984ea036136899056e7a0d26 --- /dev/null +++ b/transition_machine/include/TrainingExamples.hpp @@ -0,0 +1,31 @@ +/// @file TrainingExamples.hpp +/// @author Franck Dary +/// @version 1.0 +/// @date 2018-08-09 + +#ifndef TRAININGEXAMPLES__H +#define TRAININGEXAMPLES__H + +#include <vector> +#include "FeatureModel.hpp" + +class TrainingExamples +{ + public : + + std::vector<unsigned int> order; + std::vector<FeatureModel::FeatureDescription> examples; + std::vector<int> classes; + int nextIndex; + + public : + + TrainingExamples(); + void add(const FeatureModel::FeatureDescription & example, int gold); + unsigned int size(); + TrainingExamples getBatch(unsigned int batchSize); + void reset(); + void shuffle(); +}; + +#endif diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index aff4621121a02563ab3d3c785fbc3273040683e1..c19033d4b4cbea2c8732abd1ec4e1b35a0b148c3 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -156,7 +156,7 @@ int Classifier::getOracleActionIndex(Config & config) return as->getActionIndex(oracle->getAction(config)); } -int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end) +int Classifier::trainOnBatch(TrainingExamples & examples) { if(type != Type::Prediction) { @@ -164,10 +164,10 @@ int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end) exit(1); } - return mlp->trainOnBatch(examples, start, end); + return mlp->trainOnBatch(examples); } -int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end) +int Classifier::getScoreOnBatch(TrainingExamples & examples) { if(type != Type::Prediction) { @@ -175,7 +175,7 @@ int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end) exit(1); } - return mlp->getScoreOnBatch(examples, start, end); + return mlp->getScoreOnBatch(examples); } std::string Classifier::getActionName(int actionIndex) diff --git a/transition_machine/src/TrainingExamples.cpp b/transition_machine/src/TrainingExamples.cpp new file mode 100644 index 0000000000000000000000000000000000000000..12ac7f104e196bd9d649c51c43545af97177c1bd --- /dev/null +++ b/transition_machine/src/TrainingExamples.cpp @@ -0,0 +1,43 @@ +#include "TrainingExamples.hpp" +#include <algorithm> + +TrainingExamples::TrainingExamples() +{ + nextIndex = 0; +} + +void TrainingExamples::add(const FeatureModel::FeatureDescription & example, int gold) +{ + examples.emplace_back(example); + classes.emplace_back(gold); + order.emplace_back(order.size()); +} + +unsigned int TrainingExamples::size() +{ + return examples.size(); +} + +TrainingExamples TrainingExamples::getBatch(unsigned int batchSize) +{ + TrainingExamples batch; + + for(unsigned int i = 0; i < batchSize && (unsigned)nextIndex < order.size()-1; i++) + { + batch.add(examples[order[nextIndex]], classes[order[nextIndex]]); + nextIndex++; + } + + return batch; +} + +void TrainingExamples::reset() +{ + nextIndex = 0; +} + +void TrainingExamples::shuffle() +{ + std::random_shuffle(order.begin(), order.end()); +} +