From fab4d2a10d88758a438bda28635bab26d63ab7d9 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Mon, 16 Dec 2019 16:35:41 +0100
Subject: [PATCH] Started to implement printForDebug, and added rawInput

---
 common/include/util.hpp                |  2 +
 common/src/util.cpp                    |  5 +++
 dev/src/dev.cpp                        |  8 +---
 reading_machine/include/BaseConfig.hpp | 12 +++---
 reading_machine/include/Config.hpp     | 15 ++++++--
 reading_machine/include/SubConfig.hpp  | 10 ++---
 reading_machine/src/BaseConfig.cpp     | 13 +++++--
 reading_machine/src/Config.cpp         | 51 ++++++++++++++++++++++++++
 reading_machine/src/SubConfig.cpp      |  7 +++-
 9 files changed, 97 insertions(+), 26 deletions(-)

diff --git a/common/include/util.hpp b/common/include/util.hpp
index 17d4ee8..1363383 100644
--- a/common/include/util.hpp
+++ b/common/include/util.hpp
@@ -40,6 +40,8 @@ utf8string splitAsUtf8(std::string_view s);
 
 std::string int2HumanStr(int number);
 
+int printedLength(std::string_view s);
+
 template <typename T>
 bool isEmpty(const std::vector<T> & s)
 {
diff --git a/common/src/util.cpp b/common/src/util.cpp
index 288b9a9..bee8b90 100644
--- a/common/src/util.cpp
+++ b/common/src/util.cpp
@@ -14,6 +14,11 @@
 namespace util
 {
 
+int printedLength(std::string_view s)
+{
+  return splitAsUtf8(s).size();
+}
+
 std::string_view getFilenameFromPath(std::string_view s)
 {
   int indexOfSlash = s.find_last_of('/');
diff --git a/dev/src/dev.cpp b/dev/src/dev.cpp
index 3a07a91..4604fba 100644
--- a/dev/src/dev.cpp
+++ b/dev/src/dev.cpp
@@ -17,14 +17,8 @@ int main(int argc, char * argv[])
     configs.emplace_back(config);
 
   configs[0].wordIndex = 2000;
-
-  configs[0].update();
-  configs[0].wordIndex = 0;
-
-  configs[0].update();
-  configs[0].update();
   configs[0].update();
-  configs[0].print(stdout);
+  configs[0].printForDebug(stdout);
 
   fmt::print(stderr, "ok\n");
   std::scanf("%*c");
diff --git a/reading_machine/include/BaseConfig.hpp b/reading_machine/include/BaseConfig.hpp
index c38f926..e7726f2 100644
--- a/reading_machine/include/BaseConfig.hpp
+++ b/reading_machine/include/BaseConfig.hpp
@@ -30,8 +30,7 @@ class BaseConfig : public Config
   std::vector<std::string> colIndex2Name;
   std::unordered_map<std::string, int> colName2Index;
 
-  std::string rawInput;
-  util::utf8string rawInputUtf8;
+  Utf8String rawInputUtf8;
 
   private :
 
@@ -39,15 +38,16 @@ class BaseConfig : public Config
   void readRawInput(std::string_view rawFilename);
   void readTSVInput(std::string_view tsvFilename);
 
+  public :
+
+  BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename);
+
   std::size_t getNbColumns() const override;
   std::size_t getFirstLineIndex() const override;
   std::size_t getColIndex(const std::string & colName) const override;
+  bool hasColIndex(const std::string & colName) const override;
   const std::string & getColName(int colIndex) const override;
 
-  public :
-
-  BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename);
-
   friend SubConfig;
 };
 
diff --git a/reading_machine/include/Config.hpp b/reading_machine/include/Config.hpp
index dae3e6c..dee137d 100644
--- a/reading_machine/include/Config.hpp
+++ b/reading_machine/include/Config.hpp
@@ -17,17 +17,20 @@ class Config
   static constexpr const char * EOSSymbol0 = "0";
   static constexpr int nbHypothesesMax = 1;
 
-  private :
+  public :
 
   using String = boost::flyweight<std::string>;
   using Utf8String = boost::flyweight<util::utf8string>;
   using ValueIterator = std::vector<String>::iterator;
   using ConstValueIterator = std::vector<String>::const_iterator;
 
+  private :
+
   std::vector<String> lines;
 
   public : 
 
+  const Utf8String & rawInput;
   std::size_t wordIndex{0};
   std::size_t characterIndex{0};
   String state{"NONE"};
@@ -35,18 +38,22 @@ class Config
 
   protected :
 
+  Config(const Utf8String & rawInput);
+
+  public :
+
   virtual std::size_t getNbColumns() const = 0;
   virtual std::size_t getColIndex(const std::string & colName) const = 0;
+  virtual bool hasColIndex(const std::string & colName) const = 0;
   virtual std::size_t getFirstLineIndex() const = 0;
   virtual const std::string & getColName(int colIndex) const = 0;
 
-  protected :
-
   std::size_t getIndexOfLine(int lineIndex) const;
   std::size_t getIndexOfCol(int colIndex) const;
   std::size_t getNbLines() const;
   void addLines(unsigned int nbLines);
   void resizeLines(unsigned int nbLines);
+  bool has(int colIndex, int lineIndex, int hypothesisIndex) const;
   String & get(int colIndex, int lineIndex, int hypothesisIndex);
   const String & getConst(int colIndex, int lineIndex, int hypothesisIndex) const;
   String & getLastNotEmpty(int colIndex, int lineIndex);
@@ -58,6 +65,8 @@ class Config
 
   virtual ~Config() {}
   void print(FILE * dest) const;
+  void printForDebug(FILE * dest) const;
+  bool has(const std::string & colName, int lineIndex, int hypothesisIndex) const;
   String & get(const std::string & colName, int lineIndex, int hypothesisIndex);
   const String & getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const;
   String & getLastNotEmpty(const std::string & colName, int lineIndex);
diff --git a/reading_machine/include/SubConfig.hpp b/reading_machine/include/SubConfig.hpp
index 60faa69..e8c8efe 100644
--- a/reading_machine/include/SubConfig.hpp
+++ b/reading_machine/include/SubConfig.hpp
@@ -15,17 +15,15 @@ class SubConfig : public Config
   const BaseConfig & model;
   std::size_t firstLineIndex{0};
 
-  private :
+  public :
 
+  SubConfig(BaseConfig & model);
+  bool update();
   std::size_t getNbColumns() const override;
   std::size_t getColIndex(const std::string & colName) const override;
+  bool hasColIndex(const std::string & colName) const override;
   const std::string & getColName(int colIndex) const override;
   std::size_t getFirstLineIndex() const override;
-
-  public :
-
-  SubConfig(BaseConfig & model);
-  bool update();
 };
 
 #endif
diff --git a/reading_machine/src/BaseConfig.cpp b/reading_machine/src/BaseConfig.cpp
index 0a33c51..6853e4e 100644
--- a/reading_machine/src/BaseConfig.cpp
+++ b/reading_machine/src/BaseConfig.cpp
@@ -41,12 +41,14 @@ void BaseConfig::readRawInput(std::string_view rawFilename)
   if (not file)
     util::myThrow(fmt::format("Cannot open file '{}'", rawFilename));
 
+  std::string rawInputTemp;
+
   while (not std::feof(file))
-    rawInput.push_back(std::fgetc(file));
+    rawInputTemp.push_back(std::fgetc(file));
 
   std::fclose(file);
 
-  rawInputUtf8 = util::splitAsUtf8(rawInput);
+  rawInputUtf8 = util::splitAsUtf8(rawInputTemp);
 }
 
 void BaseConfig::readTSVInput(std::string_view tsvFilename)
@@ -102,7 +104,7 @@ void BaseConfig::readTSVInput(std::string_view tsvFilename)
   std::fclose(file);
 }
 
-BaseConfig::BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
+BaseConfig::BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) : Config(rawInputUtf8)
 {
   if (tsvFilename.empty() and rawFilename.empty())
     util::myThrow("tsvFilename and rawFilenames can't be both empty");
@@ -128,6 +130,11 @@ std::size_t BaseConfig::getColIndex(const std::string & colName) const
   return colName2Index.at(colName);
 }
 
+bool BaseConfig::hasColIndex(const std::string & colName) const
+{
+  return colName2Index.count(colName);
+}
+
 const std::string & BaseConfig::getColName(int colIndex) const
 {
   return colIndex2Name[colIndex];
diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp
index a1b8898..297b286 100644
--- a/reading_machine/src/Config.cpp
+++ b/reading_machine/src/Config.cpp
@@ -2,6 +2,10 @@
 #include "Config.hpp"
 #include "util.hpp"
 
+Config::Config(const Utf8String & rawInput) : rawInput(rawInput)
+{
+}
+
 std::size_t Config::getIndexOfLine(int lineIndex) const
 {
   return lineIndex * getNbColumns() * (nbHypothesesMax+1);
@@ -22,6 +26,16 @@ void Config::resizeLines(unsigned int nbLines)
   lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1));
 }
 
+bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const
+{
+  return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1;
+}
+
+bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const
+{
+  return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex);
+}
+
 Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex)
 {
   return get(getColIndex(colName), lineIndex, hypothesisIndex);
@@ -58,6 +72,43 @@ void Config::print(FILE * dest) const
   }
 }
 
+void Config::printForDebug(FILE * dest) const
+{
+  static constexpr int windowSize = 5;
+  int firstLineToPrint = wordIndex;
+  int lastLineToPrint = wordIndex;
+  while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint, 0))
+    --firstLineToPrint;
+  while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint, 0))
+    ++lastLineToPrint;
+
+  std::vector<std::vector<std::string>> toPrint;
+
+  for (int line = firstLineToPrint; line <= lastLineToPrint; line++)
+  {
+    toPrint.emplace_back();
+    toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : "");
+    for (unsigned int i = 0; i < getNbColumns(); i++)
+      toPrint.back().emplace_back(getLastNotEmptyConst(i, line));
+  }
+
+  std::vector<std::size_t> colLength(toPrint[0].size(), 0);
+  for (auto & line : toPrint)
+    for (unsigned int col = 0; col < line.size()-1; col++)
+      colLength[col] = std::max((int)colLength[col], util::printedLength(line[col]));
+
+  for (auto & line : toPrint)
+  {
+    for (unsigned int col = 0; col < line.size()-1; col++)
+      if (col == 0)
+        fmt::print(dest, "{:>{}}", line[col], colLength[col]);
+      else
+        fmt::print(dest, "{:<{}}{}", line[col], colLength[col], col == line.size()-2 ? "\n" : "\t");
+    if (line.back() == EOSSymbol1)
+      fmt::print(dest, "\n");
+  }
+}
+
 Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex)
 {
   int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
diff --git a/reading_machine/src/SubConfig.cpp b/reading_machine/src/SubConfig.cpp
index eeb284a..118e62b 100644
--- a/reading_machine/src/SubConfig.cpp
+++ b/reading_machine/src/SubConfig.cpp
@@ -1,6 +1,6 @@
 #include "SubConfig.hpp"
 
-SubConfig::SubConfig(BaseConfig & model) : model(model)
+SubConfig::SubConfig(BaseConfig & model) : Config(model.rawInput), model(model)
 {
   wordIndex = model.wordIndex;
   characterIndex = model.characterIndex;
@@ -71,6 +71,11 @@ std::size_t SubConfig::getColIndex(const std::string & colName) const
   return model.getColIndex(colName);
 }
 
+bool SubConfig::hasColIndex(const std::string & colName) const
+{
+  return model.getColIndex(colName);
+}
+
 const std::string & SubConfig::getColName(int colIndex) const
 {
   return model.getColName(colIndex);
-- 
GitLab