From fab4d2a10d88758a438bda28635bab26d63ab7d9 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Mon, 16 Dec 2019 16:35:41 +0100 Subject: [PATCH] Started to implement printForDebug, and added rawInput --- common/include/util.hpp | 2 + common/src/util.cpp | 5 +++ dev/src/dev.cpp | 8 +--- reading_machine/include/BaseConfig.hpp | 12 +++--- reading_machine/include/Config.hpp | 15 ++++++-- reading_machine/include/SubConfig.hpp | 10 ++--- reading_machine/src/BaseConfig.cpp | 13 +++++-- reading_machine/src/Config.cpp | 51 ++++++++++++++++++++++++++ reading_machine/src/SubConfig.cpp | 7 +++- 9 files changed, 97 insertions(+), 26 deletions(-) diff --git a/common/include/util.hpp b/common/include/util.hpp index 17d4ee8..1363383 100644 --- a/common/include/util.hpp +++ b/common/include/util.hpp @@ -40,6 +40,8 @@ utf8string splitAsUtf8(std::string_view s); std::string int2HumanStr(int number); +int printedLength(std::string_view s); + template <typename T> bool isEmpty(const std::vector<T> & s) { diff --git a/common/src/util.cpp b/common/src/util.cpp index 288b9a9..bee8b90 100644 --- a/common/src/util.cpp +++ b/common/src/util.cpp @@ -14,6 +14,11 @@ namespace util { +int printedLength(std::string_view s) +{ + return splitAsUtf8(s).size(); +} + std::string_view getFilenameFromPath(std::string_view s) { int indexOfSlash = s.find_last_of('/'); diff --git a/dev/src/dev.cpp b/dev/src/dev.cpp index 3a07a91..4604fba 100644 --- a/dev/src/dev.cpp +++ b/dev/src/dev.cpp @@ -17,14 +17,8 @@ int main(int argc, char * argv[]) configs.emplace_back(config); configs[0].wordIndex = 2000; - - configs[0].update(); - configs[0].wordIndex = 0; - - configs[0].update(); - configs[0].update(); configs[0].update(); - configs[0].print(stdout); + configs[0].printForDebug(stdout); fmt::print(stderr, "ok\n"); std::scanf("%*c"); diff --git a/reading_machine/include/BaseConfig.hpp b/reading_machine/include/BaseConfig.hpp index c38f926..e7726f2 100644 --- a/reading_machine/include/BaseConfig.hpp +++ b/reading_machine/include/BaseConfig.hpp @@ -30,8 +30,7 @@ class BaseConfig : public Config std::vector<std::string> colIndex2Name; std::unordered_map<std::string, int> colName2Index; - std::string rawInput; - util::utf8string rawInputUtf8; + Utf8String rawInputUtf8; private : @@ -39,15 +38,16 @@ class BaseConfig : public Config void readRawInput(std::string_view rawFilename); void readTSVInput(std::string_view tsvFilename); + public : + + BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename); + std::size_t getNbColumns() const override; std::size_t getFirstLineIndex() const override; std::size_t getColIndex(const std::string & colName) const override; + bool hasColIndex(const std::string & colName) const override; const std::string & getColName(int colIndex) const override; - public : - - BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename); - friend SubConfig; }; diff --git a/reading_machine/include/Config.hpp b/reading_machine/include/Config.hpp index dae3e6c..dee137d 100644 --- a/reading_machine/include/Config.hpp +++ b/reading_machine/include/Config.hpp @@ -17,17 +17,20 @@ class Config static constexpr const char * EOSSymbol0 = "0"; static constexpr int nbHypothesesMax = 1; - private : + public : using String = boost::flyweight<std::string>; using Utf8String = boost::flyweight<util::utf8string>; using ValueIterator = std::vector<String>::iterator; using ConstValueIterator = std::vector<String>::const_iterator; + private : + std::vector<String> lines; public : + const Utf8String & rawInput; std::size_t wordIndex{0}; std::size_t characterIndex{0}; String state{"NONE"}; @@ -35,18 +38,22 @@ class Config protected : + Config(const Utf8String & rawInput); + + public : + virtual std::size_t getNbColumns() const = 0; virtual std::size_t getColIndex(const std::string & colName) const = 0; + virtual bool hasColIndex(const std::string & colName) const = 0; virtual std::size_t getFirstLineIndex() const = 0; virtual const std::string & getColName(int colIndex) const = 0; - protected : - std::size_t getIndexOfLine(int lineIndex) const; std::size_t getIndexOfCol(int colIndex) const; std::size_t getNbLines() const; void addLines(unsigned int nbLines); void resizeLines(unsigned int nbLines); + bool has(int colIndex, int lineIndex, int hypothesisIndex) const; String & get(int colIndex, int lineIndex, int hypothesisIndex); const String & getConst(int colIndex, int lineIndex, int hypothesisIndex) const; String & getLastNotEmpty(int colIndex, int lineIndex); @@ -58,6 +65,8 @@ class Config virtual ~Config() {} void print(FILE * dest) const; + void printForDebug(FILE * dest) const; + bool has(const std::string & colName, int lineIndex, int hypothesisIndex) const; String & get(const std::string & colName, int lineIndex, int hypothesisIndex); const String & getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const; String & getLastNotEmpty(const std::string & colName, int lineIndex); diff --git a/reading_machine/include/SubConfig.hpp b/reading_machine/include/SubConfig.hpp index 60faa69..e8c8efe 100644 --- a/reading_machine/include/SubConfig.hpp +++ b/reading_machine/include/SubConfig.hpp @@ -15,17 +15,15 @@ class SubConfig : public Config const BaseConfig & model; std::size_t firstLineIndex{0}; - private : + public : + SubConfig(BaseConfig & model); + bool update(); std::size_t getNbColumns() const override; std::size_t getColIndex(const std::string & colName) const override; + bool hasColIndex(const std::string & colName) const override; const std::string & getColName(int colIndex) const override; std::size_t getFirstLineIndex() const override; - - public : - - SubConfig(BaseConfig & model); - bool update(); }; #endif diff --git a/reading_machine/src/BaseConfig.cpp b/reading_machine/src/BaseConfig.cpp index 0a33c51..6853e4e 100644 --- a/reading_machine/src/BaseConfig.cpp +++ b/reading_machine/src/BaseConfig.cpp @@ -41,12 +41,14 @@ void BaseConfig::readRawInput(std::string_view rawFilename) if (not file) util::myThrow(fmt::format("Cannot open file '{}'", rawFilename)); + std::string rawInputTemp; + while (not std::feof(file)) - rawInput.push_back(std::fgetc(file)); + rawInputTemp.push_back(std::fgetc(file)); std::fclose(file); - rawInputUtf8 = util::splitAsUtf8(rawInput); + rawInputUtf8 = util::splitAsUtf8(rawInputTemp); } void BaseConfig::readTSVInput(std::string_view tsvFilename) @@ -102,7 +104,7 @@ void BaseConfig::readTSVInput(std::string_view tsvFilename) std::fclose(file); } -BaseConfig::BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) +BaseConfig::BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) : Config(rawInputUtf8) { if (tsvFilename.empty() and rawFilename.empty()) util::myThrow("tsvFilename and rawFilenames can't be both empty"); @@ -128,6 +130,11 @@ std::size_t BaseConfig::getColIndex(const std::string & colName) const return colName2Index.at(colName); } +bool BaseConfig::hasColIndex(const std::string & colName) const +{ + return colName2Index.count(colName); +} + const std::string & BaseConfig::getColName(int colIndex) const { return colIndex2Name[colIndex]; diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp index a1b8898..297b286 100644 --- a/reading_machine/src/Config.cpp +++ b/reading_machine/src/Config.cpp @@ -2,6 +2,10 @@ #include "Config.hpp" #include "util.hpp" +Config::Config(const Utf8String & rawInput) : rawInput(rawInput) +{ +} + std::size_t Config::getIndexOfLine(int lineIndex) const { return lineIndex * getNbColumns() * (nbHypothesesMax+1); @@ -22,6 +26,16 @@ void Config::resizeLines(unsigned int nbLines) lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1)); } +bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const +{ + return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1; +} + +bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const +{ + return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex); +} + Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex) { return get(getColIndex(colName), lineIndex, hypothesisIndex); @@ -58,6 +72,43 @@ void Config::print(FILE * dest) const } } +void Config::printForDebug(FILE * dest) const +{ + static constexpr int windowSize = 5; + int firstLineToPrint = wordIndex; + int lastLineToPrint = wordIndex; + while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint, 0)) + --firstLineToPrint; + while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint, 0)) + ++lastLineToPrint; + + std::vector<std::vector<std::string>> toPrint; + + for (int line = firstLineToPrint; line <= lastLineToPrint; line++) + { + toPrint.emplace_back(); + toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : ""); + for (unsigned int i = 0; i < getNbColumns(); i++) + toPrint.back().emplace_back(getLastNotEmptyConst(i, line)); + } + + std::vector<std::size_t> colLength(toPrint[0].size(), 0); + for (auto & line : toPrint) + for (unsigned int col = 0; col < line.size()-1; col++) + colLength[col] = std::max((int)colLength[col], util::printedLength(line[col])); + + for (auto & line : toPrint) + { + for (unsigned int col = 0; col < line.size()-1; col++) + if (col == 0) + fmt::print(dest, "{:>{}}", line[col], colLength[col]); + else + fmt::print(dest, "{:<{}}{}", line[col], colLength[col], col == line.size()-2 ? "\n" : "\t"); + if (line.back() == EOSSymbol1) + fmt::print(dest, "\n"); + } +} + Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex) { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); diff --git a/reading_machine/src/SubConfig.cpp b/reading_machine/src/SubConfig.cpp index eeb284a..118e62b 100644 --- a/reading_machine/src/SubConfig.cpp +++ b/reading_machine/src/SubConfig.cpp @@ -1,6 +1,6 @@ #include "SubConfig.hpp" -SubConfig::SubConfig(BaseConfig & model) : model(model) +SubConfig::SubConfig(BaseConfig & model) : Config(model.rawInput), model(model) { wordIndex = model.wordIndex; characterIndex = model.characterIndex; @@ -71,6 +71,11 @@ std::size_t SubConfig::getColIndex(const std::string & colName) const return model.getColIndex(colName); } +bool SubConfig::hasColIndex(const std::string & colName) const +{ + return model.getColIndex(colName); +} + const std::string & SubConfig::getColName(int colIndex) const { return model.getColName(colIndex); -- GitLab