From 9f359ef3c81551f2499e9c5e5e992e58a033a2dd Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Tue, 3 Dec 2019 19:52:28 +0100
Subject: [PATCH] Made sequential tokeparser oracle less verbose

---
 transition_machine/src/Oracle.cpp | 131 +++++++++---------------------
 1 file changed, 37 insertions(+), 94 deletions(-)

diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp
index ec954b2..2a2a945 100644
--- a/transition_machine/src/Oracle.cpp
+++ b/transition_machine/src/Oracle.cpp
@@ -804,75 +804,59 @@ void Oracle::createDatabase()
   },
   [](Config & c, Oracle *)
   {
+    static std::map<std::string, int> lastIndexDone;
+
     if (c.pastActions.size() == 0)
+    {
+      lastIndexDone = {{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1},{"segmenter",-1}};
       return std::string("MOVE tokenizer 0");
+    }
 
     std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first);
     std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name);
+
     std::string newState;
     int movement = 0;
 
-    static constexpr int lookahead = 2;
-    static std::map<std::string,int> done{{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}};
-    static std::map<std::string,int> lastIndexDone{{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}};
-    static std::map<std::string,int> todo{{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}};
-
     if (previousState == "tokenizer")
     {
+      newState = previousState;
+
       if (util::split(previousAction, ' ')[0] == "splitword" || util::split(previousAction, ' ')[0] == "endword")
       {
-        done[previousState]++;
         lastIndexDone[previousState] = c.getHead();
 
         if (util::split(previousAction, ' ')[0] == "splitword")
         {
           int splitSize = util::split(util::split(previousAction, ' ')[1], '@').size();
-          done[previousState] += splitSize-1;
           lastIndexDone[previousState] += splitSize-1;
         }
 
-        if (done[previousState] < todo[previousState])
-        {
-          newState = "tokenizer";
-          movement = lastIndexDone[newState]-c.getHead()+1;
-        }
-        else
-        {
+        if (c.rawInputOnlySeparatorsLeft())
           newState = "tagger";
-          movement = lastIndexDone[newState]-c.getHead()+1;
-        }
+
+        movement = lastIndexDone[newState]-c.getHead()+1;
       }
-      else
-        newState = "tokenizer";
     }
     else if (previousState == "tagger")
     {
-      done[previousState]++;
+      newState = previousState;
       lastIndexDone[previousState] = c.getHead();
-      if (done[previousState] < todo[previousState])
-      {
-        newState = "tagger";
-        movement = 1;
-      }
-      else
-      {
+      if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"])
         newState = "morpho";
-        movement = lastIndexDone[newState]-c.getHead()+1;
-      }
+
+      movement = lastIndexDone[newState]-c.getHead()+1;
     }
     else if (previousState == "morpho")
     {
-      newState = "morpho";
-      if (previousAction == "nothing")
+      newState = previousState;
+
+      if (util::split(previousAction, ' ')[0] == "nothing")
       {
-        done[previousState]++;
         lastIndexDone[previousState] = c.getHead();
-        if (done[previousState] < todo[previousState])
-        {
-          newState = "morpho";
-          movement = 1;
-        }
-        else
+        movement = lastIndexDone[newState]-c.getHead()+1;
+        
+        if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"])
         {
           newState = "lemmatizer_lookup";
           movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1;
@@ -881,24 +865,22 @@ void Oracle::createDatabase()
     }
     else if (previousState == "lemmatizer_lookup")
     {
-      if (previousAction == "notfound")
+      newState = "lemmatizer_case";
+
+      if (util::split(previousAction, ' ')[0] == "notfound")
         newState = "lemmatizer_rules";
-      else
-        newState = "lemmatizer_case";
     }
     else if (previousState == "lemmatizer_rules")
+    {
       newState = "lemmatizer_case";
+    }
     else if (previousState == "lemmatizer_case")
     {
-      newState = "parser";
-      done[previousState]++;
       lastIndexDone[previousState] = c.getHead();
-      if (done[previousState] < todo[previousState])
-      {
-        newState = "lemmatizer_rules";
-        movement = 1;
-      }
-      else
+      newState = "lemmatizer_lookup";
+
+      movement = 1;
+      if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"])
       {
         newState = "parser";
         movement = lastIndexDone[newState]-c.getHead()+1;
@@ -906,60 +888,21 @@ void Oracle::createDatabase()
     }
     else if (previousState == "parser")
     {
+      newState = previousState;
       if (util::split(previousAction, ' ')[0] == "shift" || util::split(previousAction, ' ')[0] == "right")
       {
-        newState = "segmenter";
-        movement = 0;
         lastIndexDone[previousState] = c.getHead();
+        newState = "segmenter";
+        movement = lastIndexDone[newState]-c.getHead()+1;
       }
-      else
-        newState = "parser";
     }
     else if (previousState == "segmenter")
     {
-      todo["tokenizer"] += 1;
-      todo["tagger"] += 1;
-      todo["morpho"] += 1;
-      todo["lemmatizer_case"] += 1;
-
-      newState = "tokenizer";
+      lastIndexDone[previousState] = c.getHead();
+      newState = "parser";
+      if (lastIndexDone[previousState] >= lastIndexDone["tokenizer"])
+        return std::string("");
       movement = lastIndexDone[newState]-c.getHead()+1;
-
-      if (c.rawInputHeadIndex >= (int)c.rawInput.size() || c.rawInputOnlySeparatorsLeft() || done[newState] >= todo[newState])
-      {
-        newState = "tagger";
-        movement = lastIndexDone[newState]-c.getHead()+1;
-        if (lastIndexDone[newState] >= lastIndexDone["tokenizer"])
-        {
-          newState = "morpho";
-          movement = lastIndexDone[newState]-c.getHead()+1;
-          if (lastIndexDone[newState] >= lastIndexDone["tagger"])
-          {
-            newState = "lemmatizer_rules";
-            movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1;
-            if (lastIndexDone["lemmatizer_case"] >= lastIndexDone["morpho"])
-            {
-              newState = "parser";
-              movement = lastIndexDone[newState]-c.getHead()+1;
-              if (lastIndexDone[newState] >= lastIndexDone["lemmatizer_case"])
-              {
-                newState = previousState;
-                movement = 1;
-
-                if (c.getHead() >= lastIndexDone["tagger"])
-                {
-                  done = {{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}};
-                  lastIndexDone = {{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}};
-                  todo = {{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}};
-            
-                  return std::string("");
-                }
-              }
-            }
-          }
-        }
-      }
-
     }
     else
       newState = "unknown("+std::string(ERRINFO)+")("+previousState+")("+previousAction+")";
-- 
GitLab