diff --git a/common/include/util.hpp b/common/include/util.hpp index a207f258d8cf8d87c7411446a733a20537624dd9..839f925835bc7c60cca8e0ea54e44aa92f3d7baf 100644 --- a/common/include/util.hpp +++ b/common/include/util.hpp @@ -42,6 +42,10 @@ std::string int2HumanStr(int number); int printedLength(std::string_view s); +bool isSeparator(utf8char c); + +bool isIllegal(utf8char c); + template <typename T> bool isEmpty(const std::vector<T> & s) { diff --git a/common/src/util.cpp b/common/src/util.cpp index bee8b9078dd6740d8edd8a1f6730c438f29576d4..8214980eb1466ae9326c8047fbf934dcd67d2464 100644 --- a/common/src/util.cpp +++ b/common/src/util.cpp @@ -25,6 +25,16 @@ std::string_view getFilenameFromPath(std::string_view s) return {s.data()+indexOfSlash+1, s.size()-1-indexOfSlash}; } +bool isSeparator(utf8char c) +{ + return c == ' ' || isIllegal(c); +} + +bool isIllegal(utf8char c) +{ + return c == '\n' || c == '\t'; +} + std::vector<std::string_view> split(std::string_view remaining, char delimiter) { std::vector<std::string_view> result; diff --git a/dev/src/dev.cpp b/dev/src/dev.cpp index 3d1faed5c4efc6ec2994cc8fd2105024bf4ccc72..d3c9c9a82341b6a54d8dfbf39b426c6a70211a7b 100644 --- a/dev/src/dev.cpp +++ b/dev/src/dev.cpp @@ -12,6 +12,7 @@ int main(int argc, char * argv[]) BaseConfig goldConfig(argv[3], argv[1], argv[2]); SubConfig config(goldConfig); + auto other = config; while (config.moveWordIndex(1)) { diff --git a/reading_machine/include/Config.hpp b/reading_machine/include/Config.hpp index 12cb628389e328cf95769260bc56fdaf3f7465fa..c046f1f369147dd11119eea2427e333c87742057 100644 --- a/reading_machine/include/Config.hpp +++ b/reading_machine/include/Config.hpp @@ -20,7 +20,7 @@ class Config public : using String = boost::flyweight<std::string>; - using Utf8String = boost::flyweight<util::utf8string>; + using Utf8String = util::utf8string; using ValueIterator = std::vector<String>::iterator; using ConstValueIterator = std::vector<String>::const_iterator; @@ -72,7 +72,7 @@ class Config const String & getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const; String & getLastNotEmpty(const std::string & colName, int lineIndex); const String & getLastNotEmptyConst(const std::string & colName, int lineIndex) const; - bool hasLetter(int letterIndex) const; + bool hasCharacter(int letterIndex) const; util::utf8char getLetter(int letterIndex) const; void addToHistory(const std::string & transition); void addToStack(std::size_t index); @@ -81,6 +81,13 @@ class Config bool isEmptyNode(std::size_t lineIndex) const; bool isToken(std::size_t lineIndex) const; bool moveWordIndex(int relativeMovement); + bool moveCharacterIndex(int relativeMovement); + bool rawInputOnlySeparatorsLeft() const; + std::size_t getWordIndex() const; + std::size_t getCharacterIndex() const; + const String & getHistory(int relativeIndex) const; + bool hasHistory(int relativeIndex) const; + }; #endif diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp index ed8d28727f2868d7bb2ebc57ecbd93ebb21a8581..9cde239802339abe61c8896ef5052dcf01eb932d 100644 --- a/reading_machine/src/Config.cpp +++ b/reading_machine/src/Config.cpp @@ -207,14 +207,14 @@ void Config::addToStack(std::size_t index) stack.push_back(index); } -bool Config::hasLetter(int letterIndex) const +bool Config::hasCharacter(int letterIndex) const { return letterIndex >= 0 and letterIndex < (int)util::getSize(rawInput); } util::utf8char Config::getLetter(int letterIndex) const { - return rawInput.get()[letterIndex]; + return rawInput[letterIndex]; } bool Config::isComment(std::size_t lineIndex) const @@ -260,3 +260,48 @@ bool Config::moveWordIndex(int relativeMovement) return true; } +bool Config::moveCharacterIndex(int relativeMovement) +{ + for (int i = 0; i < relativeMovement; i++) + { + int oldVal = characterIndex; + relativeMovement > 0 ? characterIndex++ : characterIndex--; + if (!hasCharacter(characterIndex)) + { + characterIndex = oldVal; + return false; + } + } + + return true; +} + +bool Config::rawInputOnlySeparatorsLeft() const +{ + for (unsigned int i = characterIndex; i < rawInput.size(); i++) + if (!util::isSeparator(rawInput[i])) + return false; + + return true; +} + +std::size_t Config::getWordIndex() const +{ + return wordIndex; +} + +std::size_t Config::getCharacterIndex() const +{ + return characterIndex; +} + +const Config::String & Config::getHistory(int relativeIndex) const +{ + return history[history.size()-1-relativeIndex]; +} + +bool Config::hasHistory(int relativeIndex) const +{ + return relativeIndex > 0 && relativeIndex < (int)history.size(); +} +