Skip to content
Snippets Groups Projects
Config.cpp 16.9 KiB
Newer Older
  • Learn to ignore specific revisions
  • Franck Dary's avatar
    Franck Dary committed
    #include "Config.hpp"
    #include "util.hpp"
    
    
    Config::Config(const Utf8String & rawInput) : rawInput(rawInput)
    {
    }
    
    
    std::size_t Config::getIndexOfLine(int lineIndex) const
    
    Franck Dary's avatar
    Franck Dary committed
    {
    
      return lineIndex * getNbColumns() * (nbHypothesesMax+1);
    
    Franck Dary's avatar
    Franck Dary committed
    }
    
    Franck Dary's avatar
    Franck Dary committed
    
    
    std::size_t Config::getIndexOfCol(int colIndex) const
    
    Franck Dary's avatar
    Franck Dary committed
    {
    
      return colIndex * (nbHypothesesMax+1);
    
    Franck Dary's avatar
    Franck Dary committed
    }
    
    
    void Config::addLines(unsigned int nbLines)
    
    Franck Dary's avatar
    Franck Dary committed
    {
    
      lines.resize(lines.size() + nbLines*getNbColumns()*(nbHypothesesMax+1));
    
    Franck Dary's avatar
    Franck Dary committed
    }
    
    
    void Config::addComment()
    {
      lines.resize(lines.size() + getNbColumns()*(nbHypothesesMax+1));
      get(0, getNbLines()-1, 0) = "#";
      getLastNotEmptyHyp(0, getNbLines()-1) = "#";
    }
    
    
    void Config::resizeLines(unsigned int nbLines)
    
    Franck Dary's avatar
    Franck Dary committed
    {
    
      lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1));
    
    Franck Dary's avatar
    Franck Dary committed
    }
    
    
    bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const
    {
      return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1;
    }
    
    bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const
    {
      return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex);
    }
    
    
    Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex)
    
    Franck Dary's avatar
    Franck Dary committed
    {
    
      return get(getColIndex(colName), lineIndex, hypothesisIndex);
    
    Franck Dary's avatar
    Franck Dary committed
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    const Config::String & Config::getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const
    {
      return getConst(getColIndex(colName), lineIndex, hypothesisIndex);
    }
    
    
    Config::String & Config::get(int colIndex, int lineIndex, int hypothesisIndex)
    
      return *getIterator(colIndex, lineIndex, hypothesisIndex);
    
    Franck Dary's avatar
    Franck Dary committed
    const Config::String & Config::getConst(int colIndex, int lineIndex, int hypothesisIndex) const
    {
      return *getConstIterator(colIndex, lineIndex, hypothesisIndex);
    }
    
    
    std::size_t Config::getNbLines() const
    
      return lines.size() / getIndexOfCol(getNbColumns());
    
    Franck Dary's avatar
    Franck Dary committed
    void Config::print(FILE * dest) const
    
      std::vector<std::string> currentSequence;
      std::vector<std::string> currentSequenceComments;
    
      auto flushCurrentSequence = [&dest, &currentSequence, &currentSequenceComments]()
      {
        if (currentSequence.empty() && currentSequenceComments.empty())
          return;
    
        for (auto & comment : currentSequenceComments)
          fmt::print(dest, "{}", comment);
    
        for (auto & line : currentSequence)
          fmt::print(dest, "{}", line);
    
        fmt::print(dest, "\n");
    
        currentSequence.clear();
        currentSequenceComments.clear();
      };
    
    
      for (unsigned int line = 0; line < getNbLines(); line++)
      {
    
    Franck Dary's avatar
    Franck Dary committed
        if (isComment(getFirstLineIndex()+line))
        {
    
          currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0)));
    
    Franck Dary's avatar
    Franck Dary committed
          continue;
        }
    
        for (unsigned int i = 0; i < getNbColumns()-1; i++)
    
          auto & colContent = getAsFeature(i, getFirstLineIndex()+line);
    
          std::string valueToPrint = colContent;
    
          try
          {
            if (getColName(i) == headColName)
              if (valueToPrint != "0")
    
                valueToPrint = getAsFeature(idColName, std::stoi(valueToPrint));
    
          } catch(std::exception &) {}
    
          if (valueToPrint.empty())
            valueToPrint = "_";
    
    
          currentSequence.emplace_back(fmt::format("{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n"));
    
        auto & eosColContent = getAsFeature(EOSColName, getFirstLineIndex()+line);
    
        if (eosColContent == EOSSymbol1)
          flushCurrentSequence();
    
    
      flushCurrentSequence();
    
    void Config::printForDebug(FILE * dest) const
    {
    
    Franck Dary's avatar
    Franck Dary committed
      static constexpr int windowSize = 10;
    
    Franck Dary's avatar
    Franck Dary committed
      static constexpr int lettersWindowSize = 40;
    
      static constexpr int maxWordLength = 7;
    
      fmt::print(dest, "\n");
    
      int firstLineToPrint = wordIndex;
      int lastLineToPrint = wordIndex;
    
      while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint-1, 0))
    
      while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint+1, 0))
    
        ++lastLineToPrint;
    
      std::vector<std::vector<std::string>> toPrint;
    
    
    Franck Dary's avatar
    Franck Dary committed
      toPrint.emplace_back();
      toPrint.back().emplace_back("");
      for (unsigned int i = 0; i < getNbColumns(); i++)
        toPrint.back().emplace_back(getColName(i));
    
    
      for (int line = firstLineToPrint; line <= lastLineToPrint; line++)
      {
    
    Franck Dary's avatar
    Franck Dary committed
        if (isComment(line))
          continue;
    
        toPrint.emplace_back();
        toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : "");
        for (unsigned int i = 0; i < getNbColumns(); i++)
    
          auto & colContent = getAsFeature(i, line);
    
          std::string toPrintCol = colContent;
          try
          {
    
            if (getColName(i) == headColName && toPrintCol != "_" && !toPrintCol.empty())
    
              if (toPrintCol != "0")
    
                toPrintCol = getAsFeature(idColName, std::stoi(toPrintCol));
          } catch(std::exception & e) {util::myThrow(fmt::format("toPrintCol='{}' {}", toPrintCol, e.what()));}
    
          toPrint.back().emplace_back(util::shrink(toPrintCol, maxWordLength));
    
      }
    
      std::vector<std::size_t> colLength(toPrint[0].size(), 0);
      for (auto & line : toPrint)
        for (unsigned int col = 0; col < line.size()-1; col++)
          colLength[col] = std::max((int)colLength[col], util::printedLength(line[col]));
    
    
    Franck Dary's avatar
    Franck Dary committed
      int lengthSum = 2*getNbColumns();
      for (auto & val : colLength)
        lengthSum += val;
      std::string longLine = fmt::format("{:-<{}}", "", lengthSum);
    
      std::string historyStr = "";
      for (auto & h : history)
    
    Franck Dary's avatar
    Franck Dary committed
        historyStr += h;
        historyStr += ",";
      }
      if (!historyStr.empty())
        historyStr.pop_back();
    
    Franck Dary's avatar
    Franck Dary committed
      std::string stackStr = "";
      for (auto & s : stack)
      {
    
        if (hasColIndex(idColName))
    
    Franck Dary's avatar
    Franck Dary committed
        else
          stackStr += std::to_string(s);
        stackStr += ",";
      }
      if (!stackStr.empty())
        stackStr.pop_back();
    
    Franck Dary's avatar
    Franck Dary committed
      fmt::print(dest, "{}\n", longLine);
      for (std::size_t index = characterIndex; index < util::getSize(rawInput) and index - characterIndex < lettersWindowSize; index++)
        fmt::print(dest, "{}", getLetter(index));
    
      if (rawInput.size())
        fmt::print(dest, "\n{}\n", longLine);
    
    Franck Dary's avatar
    Franck Dary committed
      fmt::print(dest, "State={}\nwordIndex={} characterIndex={}\nhistory=({})\nstack=({})\n", state, wordIndex, characterIndex, historyStr, stackStr);
    
    Franck Dary's avatar
    Franck Dary committed
      fmt::print(dest, "{}\n", longLine);
    
      for (unsigned int line = 0; line < toPrint.size(); line++)
      {
        if (line == 1)
          fmt::print(dest, "{}\n", longLine);
        for (unsigned int col = 0; col < toPrint[line].size()-1; col++)
    
          fmt::print(dest, "{}{:>{}}{}", toPrint[line][col], "", colLength[col]-util::printedLength(toPrint[line][col]), col == toPrint[line].size()-2 ? "\n" : "  ");
    
    Franck Dary's avatar
    Franck Dary committed
        if (toPrint[line].back() == EOSSymbol1)
    
    
      fmt::print(dest, "{}\n", longLine);
    
    Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex)
    
      if (!has(colIndex, lineIndex, 0))
        util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
    
      int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
    
    Franck Dary's avatar
    Franck Dary committed
    
      for (int i = nbHypothesesMax; i > 0; --i)
        if (!util::isEmpty(lines[baseIndex+i]))
          return lines[baseIndex+i];
    
      return lines[baseIndex];
    }
    
    
    Config::String & Config::getLastNotEmptyHyp(int colIndex, int lineIndex)
    {
    
      if (!has(colIndex, lineIndex, 0))
        util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
    
      int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
    
      for (int i = nbHypothesesMax; i > 0; --i)
        if (!util::isEmpty(lines[baseIndex+i]))
          return lines[baseIndex+i];
    
      return lines[baseIndex+1];
    }
    
    
    Config::String & Config::getFirstEmpty(int colIndex, int lineIndex)
    {
    
      if (!has(colIndex, lineIndex, 0))
        util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
    
      int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
    
    
    Franck Dary's avatar
    Franck Dary committed
      for (int i = 1; i < nbHypothesesMax; ++i)
    
        if (util::isEmpty(lines[baseIndex+i]))
          return lines[baseIndex+i];
    
      return lines[baseIndex+nbHypothesesMax]; 
    }
    
    Config::String & Config::getFirstEmpty(const std::string & colName, int lineIndex)
    {
      return getFirstEmpty(getColIndex(colName), lineIndex);
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    const Config::String & Config::getLastNotEmptyConst(int colIndex, int lineIndex) const
    {
    
      if (!has(colIndex, lineIndex, 0))
        util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
    
    Franck Dary's avatar
    Franck Dary committed
      int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
    
    
      for (int i = nbHypothesesMax; i > 0; --i)
    
        if (!util::isEmpty(lines[baseIndex+i]))
    
    const Config::String & Config::getLastNotEmptyHypConst(int colIndex, int lineIndex) const
    {
    
      if (!has(colIndex, lineIndex, 0))
        util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
    
      int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
    
      for (int i = nbHypothesesMax; i > 0; --i)
        if (!util::isEmpty(lines[baseIndex+i]))
          return lines[baseIndex+i];
    
      return lines[baseIndex+1];
    
    }
    
    const Config::String & Config::getAsFeature(int colIndex, int lineIndex) const
    {
      if (isPredicted(getColName(colIndex)))
        return getLastNotEmptyHypConst(colIndex, lineIndex);
    
      return getLastNotEmptyConst(colIndex, lineIndex);
    }
    
    const Config::String & Config::getAsFeature(const std::string & colName, int lineIndex) const
    {
      return getAsFeature(getColIndex(colName), lineIndex);
    
    Config::String & Config::getLastNotEmpty(const std::string & colName, int lineIndex)
    
      return getLastNotEmpty(getColIndex(colName), lineIndex);
    
    Config::String & Config::getLastNotEmptyHyp(const std::string & colName, int lineIndex)
    {
      return getLastNotEmptyHyp(getColIndex(colName), lineIndex);
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    const Config::String & Config::getLastNotEmptyConst(const std::string & colName, int lineIndex) const
    {
      return getLastNotEmptyConst(getColIndex(colName), lineIndex);
    }
    
    
    const Config::String & Config::getLastNotEmptyHypConst(const std::string & colName, int lineIndex) const
    {
      return getLastNotEmptyHypConst(getColIndex(colName), lineIndex);
    }
    
    
    Config::ValueIterator Config::getIterator(int colIndex, int lineIndex, int hypothesisIndex)
    {
      return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex;
    }
    
    Config::ConstValueIterator Config::getConstIterator(int colIndex, int lineIndex, int hypothesisIndex) const
    
      return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex;
    
    Franck Dary's avatar
    Franck Dary committed
    void Config::addToHistory(const std::string & transition)
    {
      history.push_back(String(transition));
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    void Config::addToStack(std::size_t index)
    {
      stack.push_back(index);
    }
    
    
    void Config::popStack()
    {
    
      lastPoppedStack = getStack(0);
    
      stack.pop_back();
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    bool Config::hasCharacter(int letterIndex) const
    
    Franck Dary's avatar
    Franck Dary committed
    {
      return letterIndex >= 0 and letterIndex < (int)util::getSize(rawInput);
    }
    
    util::utf8char Config::getLetter(int letterIndex) const
    
    Franck Dary's avatar
    Franck Dary committed
      return rawInput[letterIndex];
    
    Franck Dary's avatar
    Franck Dary committed
    bool Config::isComment(std::size_t lineIndex) const
    {
      auto iter = getConstIterator(0, lineIndex, 0);
    
      return !iter->get().empty() and iter->get()[0] == '#';
    }
    
    
    bool Config::isCommentPredicted(std::size_t lineIndex) const
    {
      auto & col0 = getAsFeature(0, lineIndex);
      return !util::isEmpty(col0) and col0.get()[0] == '#';
    }
    
    
    bool Config::isMultiword(std::size_t lineIndex) const
    {
    
      return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('-') != std::string::npos;
    
    bool Config::isMultiwordPredicted(std::size_t lineIndex) const
    {
      return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('-') != std::string::npos;
    }
    
    
    int Config::getMultiwordSize(std::size_t lineIndex) const
    {
      auto splited = util::split(getConst(idColName, lineIndex, 0).get(), '-');
      return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0]));
    }
    
    
    int Config::getMultiwordSizePredicted(std::size_t lineIndex) const
    {
      auto splited = util::split(getAsFeature(idColName, lineIndex).get(), '-');
      return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0]));
    }
    
    
    bool Config::isEmptyNode(std::size_t lineIndex) const
    {
    
      return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('.') != std::string::npos;
    
    bool Config::isEmptyNodePredicted(std::size_t lineIndex) const
    {
      return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('.') != std::string::npos;
    }
    
    
    bool Config::isToken(std::size_t lineIndex) const
    {
      return !isComment(lineIndex) && !isMultiword(lineIndex) && !isEmptyNode(lineIndex);
    }
    
    
    bool Config::isTokenPredicted(std::size_t lineIndex) const
    {
      return !isCommentPredicted(lineIndex) && !isMultiwordPredicted(lineIndex) && !isEmptyNodePredicted(lineIndex);
    }
    
    
    bool Config::moveWordIndex(int relativeMovement)
    {
      int nbMovements = 0;
    
      int oldVal = wordIndex;
    
      while (nbMovements != relativeMovement)
      {
        do
        {
          relativeMovement > 0 ? wordIndex++ : wordIndex--;
          if (!has(0,wordIndex,0))
          {
            wordIndex = oldVal;
            return false;
          }
        }
    
        while (isComment(wordIndex));
    
        nbMovements += relativeMovement > 0 ? 1 : -1;
      }
    
      return true;
    
    Franck Dary's avatar
    Franck Dary committed
    }
    
    
    bool Config::canMoveWordIndex(int relativeMovement) const
    
    Franck Dary's avatar
    Franck Dary committed
    {
    
      int nbMovements = 0;
      int oldVal = wordIndex;
      while (nbMovements != relativeMovement)
    
    Franck Dary's avatar
    Franck Dary committed
      {
    
    Franck Dary's avatar
    Franck Dary committed
        {
    
          relativeMovement > 0 ? oldVal++ : oldVal--;
          if (!has(0,oldVal,0))
            return false;
    
    Franck Dary's avatar
    Franck Dary committed
        }
    
        while (isComment(oldVal));
    
        nbMovements += relativeMovement > 0 ? 1 : -1;
    
    Franck Dary's avatar
    Franck Dary committed
      }
    
      return true;
    }
    
    
    bool Config::moveCharacterIndex(int relativeMovement)
    {
      int oldVal = characterIndex;
      characterIndex = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput)));
      return (int)characterIndex == oldVal + relativeMovement;
    }
    
    bool Config::canMoveCharacterIndex(int relativeMovement) const
    {
      int target = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput)));
      return target == (int)characterIndex + relativeMovement;
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    bool Config::rawInputOnlySeparatorsLeft() const
    {
      for (unsigned int i = characterIndex; i < rawInput.size(); i++)
        if (!util::isSeparator(rawInput[i]))
          return false;
    
      return true;
    }
    
    std::size_t Config::getWordIndex() const
    {
      return wordIndex;
    }
    
    std::size_t Config::getCharacterIndex() const
    {
      return characterIndex;
    }
    
    const Config::String & Config::getHistory(int relativeIndex) const
    {
      return history[history.size()-1-relativeIndex];
    }
    
    
    std::size_t Config::getStack(int relativeIndex) const
    {
      return stack[stack.size()-1-relativeIndex];
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    bool Config::hasHistory(int relativeIndex) const
    {
    
      return relativeIndex >= 0 && relativeIndex < (int)history.size();
    
    bool Config::hasStack(int relativeIndex) const
    {
    
      return relativeIndex >= 0 && relativeIndex < (int)stack.size();
    
    Franck Dary's avatar
    Franck Dary committed
    Config::String Config::getState() const
    {
      return state;
    }
    
    void Config::setState(const std::string state)
    {
      this->state = state;
    }
    
    
    Franck Dary's avatar
    Franck Dary committed
    bool Config::stateIsDone() const
    {
      if (!rawInput.empty())
        return rawInputOnlySeparatorsLeft();
    
    
      return !has(0, wordIndex+1, 0) and !hasStack(0);
    
    void Config::addPredicted(const std::set<std::string> & predicted)
    {
    
      for (auto & col : predicted)
      {
        if (!hasColIndex(col))
          util::myThrow(fmt::format("unknown column '{}'", col));
        this->predicted.insert(col);
      }
    
    }
    
    bool Config::isPredicted(const std::string & colName) const
    {
      return predicted.count(colName);
    }
    
    
    int Config::getLastPoppedStack() const
    {
      return lastPoppedStack;
    }
    
    
    int Config::getCurrentWordId() const
    {
      return currentWordId;
    }
    
    void Config::setCurrentWordId(int currentWordId)
    {
      this->currentWordId = currentWordId;
    }
    
    
    void Config::addMissingColumns()
    {
      int firstIndex = 0;
      for (unsigned int index = 0; index < getNbLines(); index++)
      {
        if (!isTokenPredicted(index))
          continue;
    
        if (util::isEmpty(getAsFeature(idColName, index)))
        {
          int last = 0;
          if (index > 0 and isTokenPredicted(index-1))
            last = std::stoi(getAsFeature(idColName, index-1));
          getLastNotEmptyHyp(idColName, index) = std::to_string(last+1);
        }
    
        int curId = std::stoi(getAsFeature(idColName, index));
        if (curId == 1)
          firstIndex = index;
    
        if (util::isEmpty(getAsFeature(headColName, index)))
          getLastNotEmptyHyp(headColName, index) = (curId == 1) ? "0" : std::to_string(firstIndex);
      }
    }
    
    
    long Config::getRelativeWordIndex(int relativeIndex) const
    {
      if (relativeIndex < 0)
      {
        for (int index = getWordIndex()-1, counter = 0; has(0,index,0); --index)
            if (!isCommentPredicted(index))
            {
              --counter;
              if (counter == relativeIndex)
                return index;
            }
      }
      else
      {
        for (int index = getWordIndex(), counter = 0; has(0,index,0); ++index)
            if (!isCommentPredicted(index))
            {
              if (counter == relativeIndex)
                return index;
              ++counter;
            }
      }
    
      return -1;
    }