From 89af3d30207ee69cced326e053d02917694b00c4 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 18 Oct 2019 16:15:12 +0200 Subject: [PATCH] Added strategy for tokenizer --- transition_machine/src/Oracle.cpp | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index 4577984..8d90e07 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -379,6 +379,45 @@ void Oracle::createDatabase() return 0; }))); + str2oracle.emplace("strategy_tokenizer", std::unique_ptr<Oracle>(new Oracle( + [](Oracle *) + { + }, + [](Config & c, Oracle *) + { + if (c.pastActions.size() == 0) + return std::string("MOVE tokenizer 0"); + + std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); + std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); + std::string newState; + int movement = 0; + + if (previousState == "signature") + { + newState = "tokenizer"; + movement = 1; + } + else if (previousState == "tokenizer") + { + if (util::split(previousAction, ' ')[0] == "splitword" || util::split(previousAction, ' ')[0] == "endword") + newState = "signature"; + else + newState = "tokenizer"; + + if (util::split(previousAction, ' ')[0] == "splitword") + { + int nbSplit = util::split(util::split(previousAction, ' ')[1], '@').size(); + movement = nbSplit-1; + } + } + return "MOVE " + newState + " " + std::to_string(movement); + }, + [](Config &, Oracle *, const std::string &) + { + return 0; + }))); + str2oracle.emplace("strategy_tokenizer,tagger", std::unique_ptr<Oracle>(new Oracle( [](Oracle *) { -- GitLab