diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index 4577984dc2c0d22e235bdb77bf50aedca739f842..8d90e0766a7d5bf88c94f7eeb53254252065620f 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -379,6 +379,45 @@ void Oracle::createDatabase() return 0; }))); + str2oracle.emplace("strategy_tokenizer", std::unique_ptr<Oracle>(new Oracle( + [](Oracle *) + { + }, + [](Config & c, Oracle *) + { + if (c.pastActions.size() == 0) + return std::string("MOVE tokenizer 0"); + + std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); + std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); + std::string newState; + int movement = 0; + + if (previousState == "signature") + { + newState = "tokenizer"; + movement = 1; + } + else if (previousState == "tokenizer") + { + if (util::split(previousAction, ' ')[0] == "splitword" || util::split(previousAction, ' ')[0] == "endword") + newState = "signature"; + else + newState = "tokenizer"; + + if (util::split(previousAction, ' ')[0] == "splitword") + { + int nbSplit = util::split(util::split(previousAction, ' ')[1], '@').size(); + movement = nbSplit-1; + } + } + return "MOVE " + newState + " " + std::to_string(movement); + }, + [](Config &, Oracle *, const std::string &) + { + return 0; + }))); + str2oracle.emplace("strategy_tokenizer,tagger", std::unique_ptr<Oracle>(new Oracle( [](Oracle *) {