diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index 5c419724bd908267a28cb7a684757ea368cc96f6..484d88100e481a5e53ab8d098c5578d7bd2c7946 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -12,7 +12,6 @@ #include <dynet/timing.h> #include <dynet/expr.h> #include "FeatureModel.hpp" -#include "TrainingExamples.hpp" /// @brief Multi Layer Perceptron. /// diff --git a/trainer/include/Trainer.hpp b/trainer/include/Trainer.hpp index 0f66aeb3b8b6025d01a428505ddabf9ddb453fd0..6efd1574028d7b19dbec7f93c9d221580d3f0c85 100644 --- a/trainer/include/Trainer.hpp +++ b/trainer/include/Trainer.hpp @@ -9,7 +9,6 @@ #include "TransitionMachine.hpp" #include "BD.hpp" #include "Config.hpp" -#include "TrainingExamples.hpp" /// @brief An object capable of training a TransitionMachine given a BD initialized with training examples. class Trainer @@ -68,17 +67,6 @@ class Trainer private : - /// @brief Train the TransitionMachine one example at a time. - /// - /// For each epoch all the Classifier of the TransitionMachine are fed all the - /// training examples, at the end of the epoch Classifier are evaluated on - /// the devBD if available, and each Classifier will be saved only if its score - /// on the current epoch is its all time best.\n - /// When a Classifier is saved that way, all the Dict involved are also saved. - /// @param nbIter The number of epochs. - /// @param mustShuffle Whether or not to shuffle examples between each epoch. - void trainUnbatched(int nbIter, bool mustShuffle); - /// @brief Compute and print scores for each Classifier on this epoch, and save the Classifier if they achieved their all time best score. void printScoresAndSave(FILE * output); @@ -107,13 +95,17 @@ class Trainer /// @param debugMode If true, infos will be printed on stderr. /// @param removeDuplicates If true, duplicates examples will be removed from the training set. Trainer(TransitionMachine & tm, BD & bd, Config & config, BD * devBD, Config * devConfig, bool debugMode, bool removeDuplicates); - /// @brief Train the TransitionMachine. + + /// @brief Train the TransitionMachine one example at a time. /// - /// @param nbIter The number of training epochs. - /// @param batchSize The size of each batch. - /// @param mustShuffle Will the examples be shuffled after every epoch ? - /// @param batched True if we feed the training algorithm with batches of examples - void train(int nbIter, int batchSize, bool mustShuffle, bool batched); + /// For each epoch all the Classifier of the TransitionMachine are fed all the + /// training examples, at the end of the epoch Classifier are evaluated on + /// the devBD if available, and each Classifier will be saved only if its score + /// on the current epoch is its all time best.\n + /// When a Classifier is saved that way, all the Dict involved are also saved. + /// @param nbIter The number of epochs. + /// @param mustShuffle Whether or not to shuffle examples between each epoch. + void train(int nbIter, bool mustShuffle); }; #endif diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index 1bc497b31f8fa3763c78f74958a5f9dd15b7115c..c541924e6826d064fdeff3ffcbaeb3ba5651f5da 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -26,6 +26,9 @@ std::map<std::string, float> Trainer::getScoreOnDev() std::map< std::string, std::pair<int, int> > counts; + if (debugMode) + fprintf(stderr, "Computing score on dev set\n"); + while (!devConfig->isFinal()) { TransitionMachine::State * currentState = tm.getCurrentState(); @@ -73,6 +76,12 @@ std::map<std::string, float> Trainer::getScoreOnDev() std::string actionName = pAction; Action * action = classifier->getAction(actionName); + if (debugMode) + { + devConfig->printForDebug(stderr); + fprintf(stderr, "pAction=<%s> action=<%s>\n", pAction.c_str(), actionName.c_str()); + } + action->apply(*devConfig); TransitionMachine::Transition * transition = tm.getTransition(actionName); tm.takeTransition(transition); @@ -87,7 +96,7 @@ std::map<std::string, float> Trainer::getScoreOnDev() return scores; } -void Trainer::trainUnbatched(int nbIter, bool mustShuffle) +void Trainer::train(int nbIter, bool mustShuffle) { this->nbIter = nbIter; @@ -98,6 +107,7 @@ void Trainer::trainUnbatched(int nbIter, bool mustShuffle) for (curIter = 0; curIter < nbIter; curIter++) { tm.reset(); + trainConfig.reset(); if(mustShuffle) @@ -144,6 +154,7 @@ void Trainer::trainUnbatched(int nbIter, bool mustShuffle) if (zeroCostActions.empty()) { fprintf(stderr, "ERROR (%s) : Unable to find any zero cost action. Aborting.\n", ERRINFO); + fprintf(stderr, "State : %s\n", currentState->name.c_str()); trainConfig.printForDebug(stderr); exit(1); } @@ -278,8 +289,3 @@ void Trainer::printScoresAndSave(FILE * output) printColumns(output, {names, acc, train, dev, savedStr}); } -void Trainer::train(int nbIter, int batchSize, bool mustShuffle, bool batched) -{ - trainUnbatched(nbIter, mustShuffle); -} - diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 9a11afaf141e68f33107128cb2737da4a3e82e85..9c8e31d513c7d6bad15ce165a8301a875f14a2b5 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -43,14 +43,10 @@ po::options_description getOptionsDescription() "Language you are working with") ("nbiter,n", po::value<int>()->default_value(5), "Number of training epochs (iterations)") - ("batchsize,b", po::value<int>()->default_value(256), - "Size of each training batch (in number of examples)") ("seed,s", po::value<int>()->default_value(100), "The random seed that will initialize RNG") ("duplicates", po::value<bool>()->default_value(true), "Remove identical training examples") - ("batched", po::value<bool>()->default_value(true), - "Uses batch of training examples") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -115,10 +111,8 @@ int main(int argc, char * argv[]) std::string expName = vm["expName"].as<std::string>(); std::string lang = vm["lang"].as<std::string>(); int nbIter = vm["nbiter"].as<int>(); - int batchSize = vm["batchsize"].as<int>(); int randomSeed = vm["seed"].as<int>(); bool mustShuffle = vm["shuffle"].as<bool>(); - bool batched = vm["batched"].as<bool>(); bool removeDuplicates = vm["duplicates"].as<bool>(); bool debugMode = vm.count("debug") == 0 ? false : true; @@ -159,7 +153,7 @@ int main(int argc, char * argv[]) } trainer->expPath = expPath; - trainer->train(nbIter, batchSize, mustShuffle, batched); + trainer->train(nbIter, mustShuffle); return 0; } diff --git a/transition_machine/include/TrainingExamples.hpp b/transition_machine/include/TrainingExamples.hpp deleted file mode 100644 index 4fd52a177419eca6c80a0a951748ea305e8dfa5e..0000000000000000000000000000000000000000 --- a/transition_machine/include/TrainingExamples.hpp +++ /dev/null @@ -1,33 +0,0 @@ -/// @file TrainingExamples.hpp -/// @author Franck Dary -/// @version 1.0 -/// @date 2018-08-09 - -#ifndef TRAININGEXAMPLES__H -#define TRAININGEXAMPLES__H - -#include <vector> -#include "FeatureModel.hpp" - -class TrainingExamples -{ - public : - - std::vector<unsigned int> order; - std::vector<FeatureModel::FeatureDescription> examples; - std::vector<int> classes; - int nextIndex; - - public : - - TrainingExamples(); - void add(const FeatureModel::FeatureDescription & example, int gold); - unsigned int size(); - TrainingExamples getBatch(unsigned int batchSize); - void reset(); - void shuffle(); - void remove(int index); - void removeDuplicates(); -}; - -#endif diff --git a/transition_machine/src/ActionBank.cpp b/transition_machine/src/ActionBank.cpp index 7877255e0fa20fc98bf80ffd89e318247256eab1..f1c28ee8285321524deec997ecc8d063443ea875 100644 --- a/transition_machine/src/ActionBank.cpp +++ b/transition_machine/src/ActionBank.cpp @@ -102,8 +102,8 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na {c.stack.push_back(c.head);}; auto undo = [](Config & c, Action::BasicAction &) {c.stack.pop_back();}; - auto appliable = [](Config &, Action::BasicAction &) - {return true;}; + auto appliable = [](Config & c, Action::BasicAction &) + {return c.head < (int)c.tapes[0].ref.size()-1;}; Action::BasicAction basicAction = {Action::BasicAction::Type::Push, "", apply, undo, appliable}; diff --git a/transition_machine/src/Config.cpp b/transition_machine/src/Config.cpp index d90f4f6bd68e70a29d1f734e6e0ea0fe3b4597cf..badbbbada01dbc681968b8d02f8996de0cb4c82d 100644 --- a/transition_machine/src/Config.cpp +++ b/transition_machine/src/Config.cpp @@ -56,6 +56,9 @@ void Config::readInput(const std::string & filename) tape.ref.emplace_back(); tape.hyp.resize(tape.ref.size()); + + tape.ref.emplace_back("0"); + tape.hyp.emplace_back(""); } } @@ -134,23 +137,23 @@ void Config::printAsOutput(FILE * output) if(bd.mustPrintLine(j)) lastToPrint = j; - for (unsigned int i = 0; i < tapes[0].hyp.size(); i++) - { + for (unsigned int i = 0; i < tapes[0].hyp.size() - 1; i++) for (unsigned int j = 0; j < tapes.size(); j++) + { if(bd.mustPrintLine(j)) fprintf(output, "%s%s", tapes[j][i].empty() ? "0" : tapes[j][i].c_str(), j == lastToPrint ? "\n" : "\t"); - } + } } void Config::moveHead(int mvt) { - if (mvt + head < (int)getTapeByInputCol(0).hyp.size()) + if (head + mvt < (int)tapes[0].ref.size()) head += mvt; } bool Config::isFinal() { - return head >= (int)getTapeByInputCol(0).hyp.size() -1; + return head >= (int)getTapeByInputCol(0).hyp.size()-1 && stack.empty(); } void Config::reset() @@ -210,6 +213,8 @@ void Config::shuffle(const std::string & delimiterTape, const std::string & deli previousIndex = i+1; } + std::pair<unsigned int, unsigned int> suffix = {delimiters.back().second+1, tape.ref.size()-1}; + std::random_shuffle(delimiters.begin(), delimiters.end()); std::vector<Tape> newTapes = tapes; @@ -224,6 +229,12 @@ void Config::shuffle(const std::string & delimiterTape, const std::string & deli std::copy(tapes[tape].ref.begin()+delimiter.first, tapes[tape].ref.begin()+delimiter.second+1, std::back_inserter(newTapes[tape].ref)); std::copy(tapes[tape].hyp.begin()+delimiter.first, tapes[tape].hyp.begin()+delimiter.second+1, std::back_inserter(newTapes[tape].hyp)); } + + if (suffix.first <= suffix.second) + { + std::copy(tapes[tape].ref.begin()+suffix.first, tapes[tape].ref.begin()+suffix.second+1, std::back_inserter(newTapes[tape].ref)); + std::copy(tapes[tape].hyp.begin()+suffix.first, tapes[tape].hyp.begin()+suffix.second+1, std::back_inserter(newTapes[tape].hyp)); + } } tapes = newTapes; diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index 3e4bb79269bbec035aa5f1d4fb8305312676a9fb..bd262315a32f8dc3ea1402727926935e95877bee 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -86,7 +86,7 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *, const std::string & action) { - return action == "WRITE 0 POS " + c.getTape("POS").ref[c.head]; + return action == "WRITE 0 POS " + c.getTape("POS").ref[c.head] || c.head >= (int)c.tapes[0].ref.size()-1; }))); str2oracle.emplace("morpho", std::unique_ptr<Oracle>(new Oracle( @@ -102,7 +102,7 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *, const std::string & action) { - return action == "WRITE 0 MORPHO " + c.getTape("MORPHO").ref[c.head]; + return action == "WRITE 0 MORPHO " + c.getTape("MORPHO").ref[c.head] || c.head >= (int)c.tapes[0].ref.size()-1; }))); str2oracle.emplace("signature", std::unique_ptr<Oracle>(new Oracle( @@ -211,7 +211,7 @@ void Oracle::createDatabase() const std::string & lemma = c.getTape("LEMMA").ref[c.head]; std::string rule = getRule(form, lemma); - return action == std::string("RULE LEMMA ON FORM ") + rule; + return action == std::string("RULE LEMMA ON FORM ") + rule || c.head >= (int)c.tapes[0].ref.size()-1; }))); str2oracle.emplace("parser", std::unique_ptr<Oracle>(new Oracle( @@ -244,6 +244,8 @@ void Oracle::createDatabase() sentenceStart++; while(sentenceEnd < (int)eos.ref.size() && eos.ref[sentenceEnd] != "1") sentenceEnd++; + if (sentenceEnd == (int)eos.ref.size()) + sentenceEnd--; auto parts = split(action); diff --git a/transition_machine/src/TrainingExamples.cpp b/transition_machine/src/TrainingExamples.cpp deleted file mode 100644 index a887acafb0e1222e975fdeb0a4717003790cb621..0000000000000000000000000000000000000000 --- a/transition_machine/src/TrainingExamples.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "TrainingExamples.hpp" -#include <algorithm> - -TrainingExamples::TrainingExamples() -{ - nextIndex = 0; -} - -void TrainingExamples::add(const FeatureModel::FeatureDescription & example, int gold) -{ - examples.emplace_back(example); - classes.emplace_back(gold); - order.emplace_back(order.size()); -} - -unsigned int TrainingExamples::size() -{ - return order.size(); -} - -TrainingExamples TrainingExamples::getBatch(unsigned int batchSize) -{ - TrainingExamples batch; - - for(unsigned int i = 0; i < batchSize && (unsigned)nextIndex < order.size()-1; i++) - { - batch.add(examples[order[nextIndex]], classes[order[nextIndex]]); - nextIndex++; - } - - return batch; -} - -void TrainingExamples::reset() -{ - nextIndex = 0; -} - -void TrainingExamples::shuffle() -{ - std::random_shuffle(order.begin(), order.end()); -} - -void TrainingExamples::removeDuplicates() -{ - std::map<std::string, int> lastIndex; - std::map<int, bool> toRemove; - - for (unsigned int i = 0; i < examples.size(); i++) - { - std::string example = examples[i].toString(); - - if (lastIndex.count(example)) - toRemove[i] = true; - else - lastIndex[example] = i; - } - - for (auto & it : toRemove) - remove(it.first); -} - -void TrainingExamples::remove(int index) -{ - for (unsigned int i = 0; i < order.size(); i++) - if ((int)order[i] == index) - { - order[i] = order.back(); - order.pop_back(); - } -} -