diff --git a/reading_machine/include/Action.hpp b/reading_machine/include/Action.hpp index 47308d3e34686134a5357d7e68cd728668eb6077..6a34c90be06b17cf383443b6f40a8b2fab12da16 100644 --- a/reading_machine/include/Action.hpp +++ b/reading_machine/include/Action.hpp @@ -55,7 +55,7 @@ class Action static Action assertIsEmpty(const std::string & colName, Config::Object object, int relativeIndex); static Action assertIsNotEmpty(const std::string & colName, Config::Object object, int relativeIndex); static Action attach(Config::Object governorObject, int governorIndex, Config::Object dependentObject, int dependentIndex); - static Action addCurCharToCurWord(); + static Action addCharsToCol(const std::string & col, int n, Config::Object object, int relativeIndex); static Action ignoreCurrentCharacter(); static Action consumeCharacterIndex(util::utf8string consumed); static Action setMultiwordIds(int multiwordSize); diff --git a/reading_machine/include/Transition.hpp b/reading_machine/include/Transition.hpp index b6671a3724b9da5c7a209bcdd8c8e1a3557ff147..a55f1b767b60cb25191227e859f329f81cd0b23b 100644 --- a/reading_machine/include/Transition.hpp +++ b/reading_machine/include/Transition.hpp @@ -46,7 +46,7 @@ class Transition void initNothing(); void initIgnoreChar(); void initEndWord(); - void initAddCharToWord(); + void initAddCharToWord(int n); void initSplitWord(std::vector<std::string> words); void initSplit(int index); void initTransformSuffix(std::string fromCol, std::string fromObj, std::string fromIndex, std::string toCol, std::string toObj, std::string toIndex, std::string rule); diff --git a/reading_machine/src/Action.cpp b/reading_machine/src/Action.cpp index eb03c35dce0a395f66795e23fa24f07e77563f65..a996ce841723e91b9c6f61acbe1230d144e89361 100644 --- a/reading_machine/src/Action.cpp +++ b/reading_machine/src/Action.cpp @@ -459,35 +459,39 @@ Action Action::assertIsNotEmpty(const std::string & colName, Config::Object obje return {Type::Check, apply, undo, appliable}; } -Action Action::addCurCharToCurWord() +Action Action::addCharsToCol(const std::string & col, int n, Config::Object object, int relativeIndex) { - auto apply = [](Config & config, Action & a) + auto apply = [col, n, object, relativeIndex](Config & config, Action & a) { - auto & curWord = config.getLastNotEmptyHyp("FORM", config.getWordIndex()); - curWord = fmt::format("{}{}", curWord, config.getLetter(config.getCharacterIndex())); + auto & curWord = config.getLastNotEmptyHyp(col, config.getRelativeWordIndex(object, relativeIndex)); + for (int i = 0; i < n; i++) + curWord = fmt::format("{}{}", curWord, config.getLetter(config.getCharacterIndex()+i)); }; - auto undo = [](Config & config, Action & a) + auto undo = [col, n, object, relativeIndex](Config & config, Action & a) { - auto & curWord = config.getLastNotEmptyHyp("FORM", config.getWordIndex()); - std::string newWord = curWord; - unsigned int nbToPop = fmt::format("{}", config.getLetter(config.getCharacterIndex())).size(); - for (unsigned int i = 0; i < nbToPop; i++) + auto & curWord = config.getLastNotEmptyHyp(col, config.getRelativeWordIndex(object, relativeIndex)); + auto newWord = util::splitAsUtf8(curWord.get()); + for (int i = 0; i < n; i++) newWord.pop_back(); - curWord = newWord; + curWord = fmt::format("{}", newWord); }; - auto appliable = [](const Config & config, const Action &) + auto appliable = [col, n, object, relativeIndex](const Config & config, const Action &) { - if (!config.hasCharacter(config.getCharacterIndex())) + if (!config.hasCharacter(config.getCharacterIndex()+n-1)) return false; - auto letter = config.getLetter(config.getCharacterIndex()); + auto firstLetter = config.getLetter(config.getCharacterIndex()); - if (letter == ' ') - return !util::isEmpty(config.getAsFeature("FORM", config.getWordIndex())); + if (firstLetter == ' ' and util::isEmpty(config.getAsFeature(col, config.getRelativeWordIndex(object, relativeIndex)))) + return false; - return !util::isIllegal(letter); + for (int i = 0; i < n; i++) + if (util::isIllegal(config.getLetter(config.getCharacterIndex()+i))) + return false; + + return true; }; return {Type::Write, apply, undo, appliable}; diff --git a/reading_machine/src/Transition.cpp b/reading_machine/src/Transition.cpp index 9f6007bd16f05ba2b94e992c8c8b4ce412867f1a..27ca57b93cf1ad0a656c5eb0e9580ea1ac5175c4 100644 --- a/reading_machine/src/Transition.cpp +++ b/reading_machine/src/Transition.cpp @@ -47,8 +47,8 @@ Transition::Transition(const std::string & name) [this](auto){initIgnoreChar();}}, {std::regex("ENDWORD"), [this](auto){initEndWord();}}, - {std::regex("ADDCHARTOWORD"), - [this](auto){initAddCharToWord();}}, + {std::regex("ADDCHARTOWORD (.+)"), + [this](auto sm){initAddCharToWord(std::stoi(sm.str(1)));}}, {std::regex("SPLIT (.+)"), [this](auto sm){(initSplit(std::stoi(sm.str(1))));}}, {std::regex("TRANSFORMSUFFIX (.+) ([bs])\\.(.+) (.+) ([bs])\\.(.+) (.+)"), @@ -205,7 +205,7 @@ void Transition::initIgnoreChar() if (curWord.size() >= goldWord.size()) return 0; - return goldWord[curWord.size()] == letter ? 1 : 0; + return goldWord[curWord.size()] == letter ? std::numeric_limits<int>::max() : 0; }; costStatic = costDynamic; @@ -219,38 +219,40 @@ void Transition::initEndWord() { if (config.getConst("FORM", config.getWordIndex(), 0) == config.getAsFeature("FORM", config.getWordIndex())) return 0; - return 1; + + return std::numeric_limits<int>::max(); }; costStatic = costDynamic; } -void Transition::initAddCharToWord() +void Transition::initAddCharToWord(int n) { sequence.emplace_back(Action::assertIsEmpty(Config::idColName, Config::Object::Buffer, 0)); sequence.emplace_back(Action::addLinesIfNeeded(0)); - sequence.emplace_back(Action::addCurCharToCurWord()); - sequence.emplace_back(Action::moveCharacterIndex(1)); + sequence.emplace_back(Action::addCharsToCol("FORM", n, Config::Object::Buffer, 0)); + sequence.emplace_back(Action::moveCharacterIndex(n)); - costDynamic = [](const Config & config) + costDynamic = [n](const Config & config) { - if (!config.hasCharacter(config.getCharacterIndex())) + if (!config.hasCharacter(config.getCharacterIndex()+n-1)) return std::numeric_limits<int>::max(); if (!config.isToken(config.getWordIndex())) return std::numeric_limits<int>::max(); - auto letter = fmt::format("{}", config.getLetter(config.getCharacterIndex())); - auto & goldWord = config.getConst("FORM", config.getWordIndex(), 0).get(); - auto & curWord = config.getAsFeature("FORM", config.getWordIndex()).get(); - if (curWord.size() + letter.size() > goldWord.size()) - return 1; + std::string curWord = config.getAsFeature("FORM", config.getWordIndex()); + std::string goldWord = config.getConst("FORM", config.getWordIndex(), 0); + for (int i = 0; i < n; i++) + curWord = fmt::format("{}{}", curWord, config.getLetter(config.getCharacterIndex()+i)); - for (unsigned int i = 0; i < letter.size(); i++) - if (goldWord[curWord.size()+i] != letter[i]) - return 1; + if (curWord.size() > goldWord.size()) + return std::numeric_limits<int>::max(); + for (unsigned int i = 0; i < curWord.size(); i++) + if (curWord[i] != goldWord[i]) + return std::numeric_limits<int>::max(); - return 0; + return std::abs((int)goldWord.size() - (int)curWord.size()); }; costStatic = costDynamic; @@ -275,12 +277,11 @@ void Transition::initSplitWord(std::vector<std::string> words) if (config.getMultiwordSize(config.getWordIndex())+2 != (int)words.size()) return std::numeric_limits<int>::max(); - int cost = 0; for (unsigned int i = 0; i < words.size(); i++) if (!config.has("FORM", config.getWordIndex()+i, 0) or config.getConst("FORM", config.getWordIndex()+i, 0) != words[i]) - cost++; + return std::numeric_limits<int>::max(); - return cost; + return 0; }; costStatic = costDynamic;