From 9f359ef3c81551f2499e9c5e5e992e58a033a2dd Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Tue, 3 Dec 2019 19:52:28 +0100 Subject: [PATCH] Made sequential tokeparser oracle less verbose --- transition_machine/src/Oracle.cpp | 131 +++++++++--------------------- 1 file changed, 37 insertions(+), 94 deletions(-) diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index ec954b2..2a2a945 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -804,75 +804,59 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *) { + static std::map<std::string, int> lastIndexDone; + if (c.pastActions.size() == 0) + { + lastIndexDone = {{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1},{"segmenter",-1}}; return std::string("MOVE tokenizer 0"); + } std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); + std::string newState; int movement = 0; - static constexpr int lookahead = 2; - static std::map<std::string,int> done{{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}}; - static std::map<std::string,int> lastIndexDone{{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}}; - static std::map<std::string,int> todo{{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}}; - if (previousState == "tokenizer") { + newState = previousState; + if (util::split(previousAction, ' ')[0] == "splitword" || util::split(previousAction, ' ')[0] == "endword") { - done[previousState]++; lastIndexDone[previousState] = c.getHead(); if (util::split(previousAction, ' ')[0] == "splitword") { int splitSize = util::split(util::split(previousAction, ' ')[1], '@').size(); - done[previousState] += splitSize-1; lastIndexDone[previousState] += splitSize-1; } - if (done[previousState] < todo[previousState]) - { - newState = "tokenizer"; - movement = lastIndexDone[newState]-c.getHead()+1; - } - else - { + if (c.rawInputOnlySeparatorsLeft()) newState = "tagger"; - movement = lastIndexDone[newState]-c.getHead()+1; - } + + movement = lastIndexDone[newState]-c.getHead()+1; } - else - newState = "tokenizer"; } else if (previousState == "tagger") { - done[previousState]++; + newState = previousState; lastIndexDone[previousState] = c.getHead(); - if (done[previousState] < todo[previousState]) - { - newState = "tagger"; - movement = 1; - } - else - { + if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"]) newState = "morpho"; - movement = lastIndexDone[newState]-c.getHead()+1; - } + + movement = lastIndexDone[newState]-c.getHead()+1; } else if (previousState == "morpho") { - newState = "morpho"; - if (previousAction == "nothing") + newState = previousState; + + if (util::split(previousAction, ' ')[0] == "nothing") { - done[previousState]++; lastIndexDone[previousState] = c.getHead(); - if (done[previousState] < todo[previousState]) - { - newState = "morpho"; - movement = 1; - } - else + movement = lastIndexDone[newState]-c.getHead()+1; + + if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"]) { newState = "lemmatizer_lookup"; movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1; @@ -881,24 +865,22 @@ void Oracle::createDatabase() } else if (previousState == "lemmatizer_lookup") { - if (previousAction == "notfound") + newState = "lemmatizer_case"; + + if (util::split(previousAction, ' ')[0] == "notfound") newState = "lemmatizer_rules"; - else - newState = "lemmatizer_case"; } else if (previousState == "lemmatizer_rules") + { newState = "lemmatizer_case"; + } else if (previousState == "lemmatizer_case") { - newState = "parser"; - done[previousState]++; lastIndexDone[previousState] = c.getHead(); - if (done[previousState] < todo[previousState]) - { - newState = "lemmatizer_rules"; - movement = 1; - } - else + newState = "lemmatizer_lookup"; + + movement = 1; + if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"]) { newState = "parser"; movement = lastIndexDone[newState]-c.getHead()+1; @@ -906,60 +888,21 @@ void Oracle::createDatabase() } else if (previousState == "parser") { + newState = previousState; if (util::split(previousAction, ' ')[0] == "shift" || util::split(previousAction, ' ')[0] == "right") { - newState = "segmenter"; - movement = 0; lastIndexDone[previousState] = c.getHead(); + newState = "segmenter"; + movement = lastIndexDone[newState]-c.getHead()+1; } - else - newState = "parser"; } else if (previousState == "segmenter") { - todo["tokenizer"] += 1; - todo["tagger"] += 1; - todo["morpho"] += 1; - todo["lemmatizer_case"] += 1; - - newState = "tokenizer"; + lastIndexDone[previousState] = c.getHead(); + newState = "parser"; + if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"]) + return std::string(""); movement = lastIndexDone[newState]-c.getHead()+1; - - if (c.rawInputHeadIndex >= (int)c.rawInput.size() || c.rawInputOnlySeparatorsLeft() || done[newState] >= todo[newState]) - { - newState = "tagger"; - movement = lastIndexDone[newState]-c.getHead()+1; - if (lastIndexDone[newState] >= lastIndexDone["tokenizer"]) - { - newState = "morpho"; - movement = lastIndexDone[newState]-c.getHead()+1; - if (lastIndexDone[newState] >= lastIndexDone["tagger"]) - { - newState = "lemmatizer_rules"; - movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1; - if (lastIndexDone["lemmatizer_case"] >= lastIndexDone["morpho"]) - { - newState = "parser"; - movement = lastIndexDone[newState]-c.getHead()+1; - if (lastIndexDone[newState] >= lastIndexDone["lemmatizer_case"]) - { - newState = previousState; - movement = 1; - - if (c.getHead() >= lastIndexDone["tagger"]) - { - done = {{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}}; - lastIndexDone = {{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}}; - todo = {{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}}; - - return std::string(""); - } - } - } - } - } - } - } else newState = "unknown("+std::string(ERRINFO)+")("+previousState+")("+previousAction+")"; -- GitLab