From 51f41f878f7cd289b481ac824221f5b45d2f1f41 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Thu, 2 Jul 2020 23:13:29 +0200
Subject: [PATCH] ADDCHARTOWORD can add multiple letters at once

---
 reading_machine/include/Action.hpp     |  2 +-
 reading_machine/include/Transition.hpp |  2 +-
 reading_machine/src/Action.cpp         | 36 +++++++++++----------
 reading_machine/src/Transition.cpp     | 43 +++++++++++++-------------
 4 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/reading_machine/include/Action.hpp b/reading_machine/include/Action.hpp
index 47308d3..6a34c90 100644
--- a/reading_machine/include/Action.hpp
+++ b/reading_machine/include/Action.hpp
@@ -55,7 +55,7 @@ class Action
   static Action assertIsEmpty(const std::string & colName, Config::Object object, int relativeIndex);
   static Action assertIsNotEmpty(const std::string & colName, Config::Object object, int relativeIndex);
   static Action attach(Config::Object governorObject, int governorIndex, Config::Object dependentObject, int dependentIndex);
-  static Action addCurCharToCurWord();
+  static Action addCharsToCol(const std::string & col, int n, Config::Object object, int relativeIndex);
   static Action ignoreCurrentCharacter();
   static Action consumeCharacterIndex(util::utf8string consumed);
   static Action setMultiwordIds(int multiwordSize);
diff --git a/reading_machine/include/Transition.hpp b/reading_machine/include/Transition.hpp
index b6671a3..a55f1b7 100644
--- a/reading_machine/include/Transition.hpp
+++ b/reading_machine/include/Transition.hpp
@@ -46,7 +46,7 @@ class Transition
   void initNothing();
   void initIgnoreChar();
   void initEndWord();
-  void initAddCharToWord();
+  void initAddCharToWord(int n);
   void initSplitWord(std::vector<std::string> words);
   void initSplit(int index);
   void initTransformSuffix(std::string fromCol, std::string fromObj, std::string fromIndex, std::string toCol, std::string toObj, std::string toIndex, std::string rule);
diff --git a/reading_machine/src/Action.cpp b/reading_machine/src/Action.cpp
index eb03c35..a996ce8 100644
--- a/reading_machine/src/Action.cpp
+++ b/reading_machine/src/Action.cpp
@@ -459,35 +459,39 @@ Action Action::assertIsNotEmpty(const std::string & colName, Config::Object obje
   return {Type::Check, apply, undo, appliable}; 
 }
 
-Action Action::addCurCharToCurWord()
+Action Action::addCharsToCol(const std::string & col, int n, Config::Object object, int relativeIndex)
 {
-  auto apply = [](Config & config, Action & a)
+  auto apply = [col, n, object, relativeIndex](Config & config, Action & a)
   {
-    auto & curWord = config.getLastNotEmptyHyp("FORM", config.getWordIndex());
-    curWord = fmt::format("{}{}", curWord, config.getLetter(config.getCharacterIndex()));
+    auto & curWord = config.getLastNotEmptyHyp(col, config.getRelativeWordIndex(object, relativeIndex));
+    for (int i = 0; i < n; i++)
+      curWord = fmt::format("{}{}", curWord, config.getLetter(config.getCharacterIndex()+i));
   };
 
-  auto undo = [](Config & config, Action & a)
+  auto undo = [col, n, object, relativeIndex](Config & config, Action & a)
   {
-    auto & curWord = config.getLastNotEmptyHyp("FORM", config.getWordIndex());
-    std::string newWord = curWord;
-    unsigned int nbToPop = fmt::format("{}", config.getLetter(config.getCharacterIndex())).size();
-    for (unsigned int i = 0; i < nbToPop; i++)
+    auto & curWord = config.getLastNotEmptyHyp(col, config.getRelativeWordIndex(object, relativeIndex));
+    auto newWord = util::splitAsUtf8(curWord.get());
+    for (int i = 0; i < n; i++)
       newWord.pop_back();
-    curWord = newWord;
+    curWord = fmt::format("{}", newWord);
   };
 
-  auto appliable = [](const Config & config, const Action &)
+  auto appliable = [col, n, object, relativeIndex](const Config & config, const Action &)
   {
-    if (!config.hasCharacter(config.getCharacterIndex()))
+    if (!config.hasCharacter(config.getCharacterIndex()+n-1))
       return false;
 
-    auto letter = config.getLetter(config.getCharacterIndex());
+    auto firstLetter = config.getLetter(config.getCharacterIndex());
 
-    if (letter == ' ')
-      return !util::isEmpty(config.getAsFeature("FORM", config.getWordIndex()));
+    if (firstLetter == ' ' and util::isEmpty(config.getAsFeature(col, config.getRelativeWordIndex(object, relativeIndex))))
+      return false;
 
-    return !util::isIllegal(letter);
+    for (int i = 0; i < n; i++)
+      if (util::isIllegal(config.getLetter(config.getCharacterIndex()+i)))
+        return false;
+
+    return true;
   };
 
   return {Type::Write, apply, undo, appliable}; 
diff --git a/reading_machine/src/Transition.cpp b/reading_machine/src/Transition.cpp
index 9f6007b..27ca57b 100644
--- a/reading_machine/src/Transition.cpp
+++ b/reading_machine/src/Transition.cpp
@@ -47,8 +47,8 @@ Transition::Transition(const std::string & name)
       [this](auto){initIgnoreChar();}},
     {std::regex("ENDWORD"),
       [this](auto){initEndWord();}},
-    {std::regex("ADDCHARTOWORD"),
-      [this](auto){initAddCharToWord();}},
+    {std::regex("ADDCHARTOWORD (.+)"),
+      [this](auto sm){initAddCharToWord(std::stoi(sm.str(1)));}},
     {std::regex("SPLIT (.+)"),
       [this](auto sm){(initSplit(std::stoi(sm.str(1))));}},
     {std::regex("TRANSFORMSUFFIX (.+) ([bs])\\.(.+) (.+) ([bs])\\.(.+) (.+)"),
@@ -205,7 +205,7 @@ void Transition::initIgnoreChar()
     if (curWord.size() >= goldWord.size())
       return 0;
 
-    return goldWord[curWord.size()] == letter ? 1 : 0;
+    return goldWord[curWord.size()] == letter ? std::numeric_limits<int>::max() : 0;
   };
 
   costStatic = costDynamic;
@@ -219,38 +219,40 @@ void Transition::initEndWord()
   {
     if (config.getConst("FORM", config.getWordIndex(), 0) == config.getAsFeature("FORM", config.getWordIndex()))
       return 0;
-    return 1;
+
+    return std::numeric_limits<int>::max();
   };
 
   costStatic = costDynamic;
 }
 
-void Transition::initAddCharToWord()
+void Transition::initAddCharToWord(int n)
 {
   sequence.emplace_back(Action::assertIsEmpty(Config::idColName, Config::Object::Buffer, 0));
   sequence.emplace_back(Action::addLinesIfNeeded(0));
-  sequence.emplace_back(Action::addCurCharToCurWord());
-  sequence.emplace_back(Action::moveCharacterIndex(1));
+  sequence.emplace_back(Action::addCharsToCol("FORM", n, Config::Object::Buffer, 0));
+  sequence.emplace_back(Action::moveCharacterIndex(n));
 
-  costDynamic = [](const Config & config)
+  costDynamic = [n](const Config & config)
   {
-    if (!config.hasCharacter(config.getCharacterIndex()))
+    if (!config.hasCharacter(config.getCharacterIndex()+n-1))
       return std::numeric_limits<int>::max();
 
     if (!config.isToken(config.getWordIndex()))
       return std::numeric_limits<int>::max();
 
-    auto letter = fmt::format("{}", config.getLetter(config.getCharacterIndex()));
-    auto & goldWord = config.getConst("FORM", config.getWordIndex(), 0).get();
-    auto & curWord = config.getAsFeature("FORM", config.getWordIndex()).get();
-    if (curWord.size() + letter.size() > goldWord.size())
-      return 1;
+    std::string curWord = config.getAsFeature("FORM", config.getWordIndex());
+    std::string goldWord = config.getConst("FORM", config.getWordIndex(), 0);
+    for (int i = 0; i < n; i++)
+      curWord = fmt::format("{}{}", curWord, config.getLetter(config.getCharacterIndex()+i));
 
-    for (unsigned int i = 0; i < letter.size(); i++)
-      if (goldWord[curWord.size()+i] != letter[i])
-        return 1;
+    if (curWord.size() > goldWord.size())
+      return std::numeric_limits<int>::max();
+    for (unsigned int i = 0; i < curWord.size(); i++)
+      if (curWord[i] != goldWord[i])
+        return std::numeric_limits<int>::max();
 
-    return 0;
+    return std::abs((int)goldWord.size() - (int)curWord.size());
   };
 
   costStatic = costDynamic;
@@ -275,12 +277,11 @@ void Transition::initSplitWord(std::vector<std::string> words)
     if (config.getMultiwordSize(config.getWordIndex())+2 != (int)words.size())
       return std::numeric_limits<int>::max();
 
-    int cost = 0;
     for (unsigned int i = 0; i < words.size(); i++)
       if (!config.has("FORM", config.getWordIndex()+i, 0) or config.getConst("FORM", config.getWordIndex()+i, 0) != words[i])
-        cost++;
+        return std::numeric_limits<int>::max();
 
-    return cost;
+    return 0;
   };
 
   costStatic = costDynamic;
-- 
GitLab