diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index 01392179aabcf006af4307804536dfd25562349c..ae50b74792774a50aee7ceff37649403f83eee7a 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -798,6 +798,175 @@ void Oracle::createDatabase() return 0; }))); + str2oracle.emplace("strategy_tokenizer,tagger,morpho,lemmatizer,parser_sequential", std::unique_ptr<Oracle>(new Oracle( + [](Oracle *) + { + }, + [](Config & c, Oracle *) + { + if (c.pastActions.size() == 0) + return std::string("MOVE tokenizer 0"); + + std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); + std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); + std::string newState; + int movement = 0; + + static constexpr int lookahead = 2; + static std::map<std::string,int> done{{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}}; + static std::map<std::string,int> lastIndexDone{{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}}; + static std::map<std::string,int> todo{{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}}; + + if (previousState == "tokenizer") + { + if (util::split(previousAction, ' ')[0] == "splitword" || util::split(previousAction, ' ')[0] == "endword") + { + done[previousState]++; + lastIndexDone[previousState] = c.getHead(); + + if (util::split(previousAction, ' ')[0] == "splitword") + { + int splitSize = util::split(util::split(previousAction, ' ')[1], '@').size(); + done[previousState] += splitSize-1; + lastIndexDone[previousState] += splitSize-1; + } + + if (done[previousState] < todo[previousState]) + { + newState = "tokenizer"; + movement = lastIndexDone[newState]-c.getHead()+1; + } + else + { + newState = "tagger"; + movement = lastIndexDone[newState]-c.getHead()+1; + } + } + else + newState = "tokenizer"; + } + else if (previousState == "tagger") + { + done[previousState]++; + lastIndexDone[previousState] = c.getHead(); + if (done[previousState] < todo[previousState]) + { + newState = "tagger"; + movement = 1; + } + else + { + newState = "morpho"; + movement = lastIndexDone[newState]-c.getHead()+1; + } + } + else if (previousState == "morpho") + { + newState = "morpho"; + if (previousAction == "nothing") + { + done[previousState]++; + lastIndexDone[previousState] = c.getHead(); + if (done[previousState] < todo[previousState]) + { + newState = "morpho"; + movement = 1; + } + else + { + newState = "lemmatizer_lookup"; + movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1; + } + } + } + else if (previousState == "lemmatizer_lookup") + { + if (previousAction == "notfound") + newState = "lemmatizer_rules"; + else + newState = "lemmatizer_case"; + } + else if (previousState == "lemmatizer_rules") + newState = "lemmatizer_case"; + else if (previousState == "lemmatizer_case") + { + newState = "parser"; + done[previousState]++; + lastIndexDone[previousState] = c.getHead(); + if (done[previousState] < todo[previousState]) + { + newState = "lemmatizer_rules"; + movement = 1; + } + else + { + newState = "parser"; + movement = lastIndexDone[newState]-c.getHead()+1; + } + } + else if (previousState == "parser") + { + if (util::split(previousAction, ' ')[0] == "shift" || util::split(previousAction, ' ')[0] == "right") + { + newState = "segmenter"; + movement = 0; + lastIndexDone[previousState] = c.getHead(); + } + else + newState = "parser"; + } + else if (previousState == "segmenter") + { + todo["tokenizer"] += 1; + todo["tagger"] += 1; + todo["morpho"] += 1; + todo["lemmatizer_case"] += 1; + + newState = "tokenizer"; + movement = lastIndexDone[newState]-c.getHead()+1; + + if (c.rawInputHeadIndex >= (int)c.rawInput.size() || done[newState] >= todo[newState]) + { + newState = "tagger"; + movement = lastIndexDone[newState]-c.getHead()+1; + if (lastIndexDone[newState]+1 >= c.getTape("FORM").size() || c.getTape("FORM")[lastIndexDone[newState]-c.getHead()+1].empty() || done[newState] >= todo[newState]) + { + newState = "morpho"; + movement = lastIndexDone[newState]-c.getHead()+1; + if (lastIndexDone[newState]+1 >= c.getTape("FORM").size() || c.getTape("FORM")[lastIndexDone[newState]-c.getHead()+1].empty() || done[newState] >= todo[newState]) + { + newState = "lemmatizer_rules"; + movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1; + if (lastIndexDone["lemmatizer_case"]+1 >= c.getTape("FORM").size() || c.getTape("FORM")[lastIndexDone["lemmatizer_case"]-c.getHead()+1].empty() || done["lemmatizer_case"] >= todo["lemmatizer_case"]) + { + newState = "parser"; + movement = lastIndexDone[newState]-c.getHead()+1; + } + } + } + } + + } + else + newState = "unknown("+std::string(ERRINFO)+")("+previousState+")("+previousAction+")"; + + if (c.isFinal()) + { + done = {{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}}; + lastIndexDone = {{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}}; + todo = {{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}}; + + if (previousState == "segmenter") + return std::string(""); + } + + return "MOVE " + newState + " " + std::to_string(movement); + }, + [](Config &, Oracle *, const std::string &) + { + return 0; + }))); + str2oracle.emplace("strategy_tokenizer,tagger,morpho,lemmatizer,parser", std::unique_ptr<Oracle>(new Oracle( [](Oracle *) {