diff --git a/common/include/util.hpp b/common/include/util.hpp index 17d4ee8add73219e0a4f4798aa89a885f110c8dd..1363383ce6708530958c5815540920ae1db195c6 100644 --- a/common/include/util.hpp +++ b/common/include/util.hpp @@ -40,6 +40,8 @@ utf8string splitAsUtf8(std::string_view s); std::string int2HumanStr(int number); +int printedLength(std::string_view s); + template <typename T> bool isEmpty(const std::vector<T> & s) { diff --git a/common/src/util.cpp b/common/src/util.cpp index 288b9a96fd6e14f436f2cc974236634bda701964..bee8b9078dd6740d8edd8a1f6730c438f29576d4 100644 --- a/common/src/util.cpp +++ b/common/src/util.cpp @@ -14,6 +14,11 @@ namespace util { +int printedLength(std::string_view s) +{ + return splitAsUtf8(s).size(); +} + std::string_view getFilenameFromPath(std::string_view s) { int indexOfSlash = s.find_last_of('/'); diff --git a/dev/src/dev.cpp b/dev/src/dev.cpp index 3a07a91b02d98cc4244f9b5867b1f064ae894d9e..4604fbafa1d7c8ccdf4ef73664aceda6dfda507b 100644 --- a/dev/src/dev.cpp +++ b/dev/src/dev.cpp @@ -17,14 +17,8 @@ int main(int argc, char * argv[]) configs.emplace_back(config); configs[0].wordIndex = 2000; - - configs[0].update(); - configs[0].wordIndex = 0; - - configs[0].update(); - configs[0].update(); configs[0].update(); - configs[0].print(stdout); + configs[0].printForDebug(stdout); fmt::print(stderr, "ok\n"); std::scanf("%*c"); diff --git a/reading_machine/include/BaseConfig.hpp b/reading_machine/include/BaseConfig.hpp index c38f926e2c5323d8e7479cf427290a5ed4b8dfc5..e7726f2348c17e7ba74f2aa38643662fb9e8a159 100644 --- a/reading_machine/include/BaseConfig.hpp +++ b/reading_machine/include/BaseConfig.hpp @@ -30,8 +30,7 @@ class BaseConfig : public Config std::vector<std::string> colIndex2Name; std::unordered_map<std::string, int> colName2Index; - std::string rawInput; - util::utf8string rawInputUtf8; + Utf8String rawInputUtf8; private : @@ -39,15 +38,16 @@ class BaseConfig : public Config void readRawInput(std::string_view rawFilename); void readTSVInput(std::string_view tsvFilename); + public : + + BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename); + std::size_t getNbColumns() const override; std::size_t getFirstLineIndex() const override; std::size_t getColIndex(const std::string & colName) const override; + bool hasColIndex(const std::string & colName) const override; const std::string & getColName(int colIndex) const override; - public : - - BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename); - friend SubConfig; }; diff --git a/reading_machine/include/Config.hpp b/reading_machine/include/Config.hpp index dae3e6cf43e61020f426b3c3ac21bbc7910070d1..dee137d7aef61d1416f85cc24afe35577b0bf127 100644 --- a/reading_machine/include/Config.hpp +++ b/reading_machine/include/Config.hpp @@ -17,17 +17,20 @@ class Config static constexpr const char * EOSSymbol0 = "0"; static constexpr int nbHypothesesMax = 1; - private : + public : using String = boost::flyweight<std::string>; using Utf8String = boost::flyweight<util::utf8string>; using ValueIterator = std::vector<String>::iterator; using ConstValueIterator = std::vector<String>::const_iterator; + private : + std::vector<String> lines; public : + const Utf8String & rawInput; std::size_t wordIndex{0}; std::size_t characterIndex{0}; String state{"NONE"}; @@ -35,18 +38,22 @@ class Config protected : + Config(const Utf8String & rawInput); + + public : + virtual std::size_t getNbColumns() const = 0; virtual std::size_t getColIndex(const std::string & colName) const = 0; + virtual bool hasColIndex(const std::string & colName) const = 0; virtual std::size_t getFirstLineIndex() const = 0; virtual const std::string & getColName(int colIndex) const = 0; - protected : - std::size_t getIndexOfLine(int lineIndex) const; std::size_t getIndexOfCol(int colIndex) const; std::size_t getNbLines() const; void addLines(unsigned int nbLines); void resizeLines(unsigned int nbLines); + bool has(int colIndex, int lineIndex, int hypothesisIndex) const; String & get(int colIndex, int lineIndex, int hypothesisIndex); const String & getConst(int colIndex, int lineIndex, int hypothesisIndex) const; String & getLastNotEmpty(int colIndex, int lineIndex); @@ -58,6 +65,8 @@ class Config virtual ~Config() {} void print(FILE * dest) const; + void printForDebug(FILE * dest) const; + bool has(const std::string & colName, int lineIndex, int hypothesisIndex) const; String & get(const std::string & colName, int lineIndex, int hypothesisIndex); const String & getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const; String & getLastNotEmpty(const std::string & colName, int lineIndex); diff --git a/reading_machine/include/SubConfig.hpp b/reading_machine/include/SubConfig.hpp index 60faa69a2e296df860a9b545a25998af7e677bd9..e8c8efe89118eeaaee9341ed6794accba4b3b71a 100644 --- a/reading_machine/include/SubConfig.hpp +++ b/reading_machine/include/SubConfig.hpp @@ -15,17 +15,15 @@ class SubConfig : public Config const BaseConfig & model; std::size_t firstLineIndex{0}; - private : + public : + SubConfig(BaseConfig & model); + bool update(); std::size_t getNbColumns() const override; std::size_t getColIndex(const std::string & colName) const override; + bool hasColIndex(const std::string & colName) const override; const std::string & getColName(int colIndex) const override; std::size_t getFirstLineIndex() const override; - - public : - - SubConfig(BaseConfig & model); - bool update(); }; #endif diff --git a/reading_machine/src/BaseConfig.cpp b/reading_machine/src/BaseConfig.cpp index 0a33c514f34f79faf0e983dabfed8499edca6854..6853e4eea2deae0df10dde3595138ae41873e4c6 100644 --- a/reading_machine/src/BaseConfig.cpp +++ b/reading_machine/src/BaseConfig.cpp @@ -41,12 +41,14 @@ void BaseConfig::readRawInput(std::string_view rawFilename) if (not file) util::myThrow(fmt::format("Cannot open file '{}'", rawFilename)); + std::string rawInputTemp; + while (not std::feof(file)) - rawInput.push_back(std::fgetc(file)); + rawInputTemp.push_back(std::fgetc(file)); std::fclose(file); - rawInputUtf8 = util::splitAsUtf8(rawInput); + rawInputUtf8 = util::splitAsUtf8(rawInputTemp); } void BaseConfig::readTSVInput(std::string_view tsvFilename) @@ -102,7 +104,7 @@ void BaseConfig::readTSVInput(std::string_view tsvFilename) std::fclose(file); } -BaseConfig::BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) +BaseConfig::BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) : Config(rawInputUtf8) { if (tsvFilename.empty() and rawFilename.empty()) util::myThrow("tsvFilename and rawFilenames can't be both empty"); @@ -128,6 +130,11 @@ std::size_t BaseConfig::getColIndex(const std::string & colName) const return colName2Index.at(colName); } +bool BaseConfig::hasColIndex(const std::string & colName) const +{ + return colName2Index.count(colName); +} + const std::string & BaseConfig::getColName(int colIndex) const { return colIndex2Name[colIndex]; diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp index a1b88982fefce7a2d1899d2b879fc3a58e3170d7..297b2864cbc9484fb9acc7f66a3951a3bbfa671e 100644 --- a/reading_machine/src/Config.cpp +++ b/reading_machine/src/Config.cpp @@ -2,6 +2,10 @@ #include "Config.hpp" #include "util.hpp" +Config::Config(const Utf8String & rawInput) : rawInput(rawInput) +{ +} + std::size_t Config::getIndexOfLine(int lineIndex) const { return lineIndex * getNbColumns() * (nbHypothesesMax+1); @@ -22,6 +26,16 @@ void Config::resizeLines(unsigned int nbLines) lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1)); } +bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const +{ + return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1; +} + +bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const +{ + return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex); +} + Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex) { return get(getColIndex(colName), lineIndex, hypothesisIndex); @@ -58,6 +72,43 @@ void Config::print(FILE * dest) const } } +void Config::printForDebug(FILE * dest) const +{ + static constexpr int windowSize = 5; + int firstLineToPrint = wordIndex; + int lastLineToPrint = wordIndex; + while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint, 0)) + --firstLineToPrint; + while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint, 0)) + ++lastLineToPrint; + + std::vector<std::vector<std::string>> toPrint; + + for (int line = firstLineToPrint; line <= lastLineToPrint; line++) + { + toPrint.emplace_back(); + toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : ""); + for (unsigned int i = 0; i < getNbColumns(); i++) + toPrint.back().emplace_back(getLastNotEmptyConst(i, line)); + } + + std::vector<std::size_t> colLength(toPrint[0].size(), 0); + for (auto & line : toPrint) + for (unsigned int col = 0; col < line.size()-1; col++) + colLength[col] = std::max((int)colLength[col], util::printedLength(line[col])); + + for (auto & line : toPrint) + { + for (unsigned int col = 0; col < line.size()-1; col++) + if (col == 0) + fmt::print(dest, "{:>{}}", line[col], colLength[col]); + else + fmt::print(dest, "{:<{}}{}", line[col], colLength[col], col == line.size()-2 ? "\n" : "\t"); + if (line.back() == EOSSymbol1) + fmt::print(dest, "\n"); + } +} + Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex) { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); diff --git a/reading_machine/src/SubConfig.cpp b/reading_machine/src/SubConfig.cpp index eeb284aec8cd390f681d000dbc2428e45625fc2a..118e62bb737f08a4fd9da2d61245add7fdcfe0fc 100644 --- a/reading_machine/src/SubConfig.cpp +++ b/reading_machine/src/SubConfig.cpp @@ -1,6 +1,6 @@ #include "SubConfig.hpp" -SubConfig::SubConfig(BaseConfig & model) : model(model) +SubConfig::SubConfig(BaseConfig & model) : Config(model.rawInput), model(model) { wordIndex = model.wordIndex; characterIndex = model.characterIndex; @@ -71,6 +71,11 @@ std::size_t SubConfig::getColIndex(const std::string & colName) const return model.getColIndex(colName); } +bool SubConfig::hasColIndex(const std::string & colName) const +{ + return model.getColIndex(colName); +} + const std::string & SubConfig::getColName(int colIndex) const { return model.getColName(colIndex);