Skip to content
Snippets Groups Projects
Config.cpp 13.5 KiB
Newer Older
Franck Dary's avatar
Franck Dary committed
#include "Config.hpp"
#include "util.hpp"

Config::Config(const Utf8String & rawInput) : rawInput(rawInput)
{
}

std::size_t Config::getIndexOfLine(int lineIndex) const
Franck Dary's avatar
Franck Dary committed
{
  return lineIndex * getNbColumns() * (nbHypothesesMax+1);
Franck Dary's avatar
Franck Dary committed
}
Franck Dary's avatar
Franck Dary committed

std::size_t Config::getIndexOfCol(int colIndex) const
Franck Dary's avatar
Franck Dary committed
{
  return colIndex * (nbHypothesesMax+1);
Franck Dary's avatar
Franck Dary committed
}

void Config::addLines(unsigned int nbLines)
Franck Dary's avatar
Franck Dary committed
{
  lines.resize(lines.size() + nbLines*getNbColumns()*(nbHypothesesMax+1));
Franck Dary's avatar
Franck Dary committed
}

void Config::resizeLines(unsigned int nbLines)
Franck Dary's avatar
Franck Dary committed
{
  lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1));
Franck Dary's avatar
Franck Dary committed
}

bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const
{
  return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1;
}

bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const
{
  return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex);
}

Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex)
Franck Dary's avatar
Franck Dary committed
{
  return get(getColIndex(colName), lineIndex, hypothesisIndex);
Franck Dary's avatar
Franck Dary committed
}

Franck Dary's avatar
Franck Dary committed
const Config::String & Config::getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const
{
  return getConst(getColIndex(colName), lineIndex, hypothesisIndex);
}

Config::String & Config::get(int colIndex, int lineIndex, int hypothesisIndex)
  return *getIterator(colIndex, lineIndex, hypothesisIndex);
Franck Dary's avatar
Franck Dary committed
const Config::String & Config::getConst(int colIndex, int lineIndex, int hypothesisIndex) const
{
  return *getConstIterator(colIndex, lineIndex, hypothesisIndex);
}

std::size_t Config::getNbLines() const
  return lines.size() / getIndexOfCol(getNbColumns());
Franck Dary's avatar
Franck Dary committed
void Config::print(FILE * dest) const
  std::vector<std::string> currentSequence;
  std::vector<std::string> currentSequenceComments;

  auto flushCurrentSequence = [&dest, &currentSequence, &currentSequenceComments]()
  {
    if (currentSequence.empty() && currentSequenceComments.empty())
      return;

    for (auto & comment : currentSequenceComments)
      fmt::print(dest, "{}", comment);

    for (auto & line : currentSequence)
      fmt::print(dest, "{}", line);

    fmt::print(dest, "\n");

    currentSequence.clear();
    currentSequenceComments.clear();
  };

  for (unsigned int line = 0; line < getNbLines(); line++)
  {
Franck Dary's avatar
Franck Dary committed
    if (isComment(getFirstLineIndex()+line))
    {
      currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0)));
Franck Dary's avatar
Franck Dary committed
      continue;
    }
    for (unsigned int i = 0; i < getNbColumns()-1; i++)
      auto & colContent = isPredicted(getColName(i)) ? getLastNotEmptyHypConst(i, getFirstLineIndex()+line) : getLastNotEmptyConst(i, getFirstLineIndex()+line);
      std::string valueToPrint = colContent;
      if (valueToPrint.empty())
        valueToPrint = "_";

      currentSequence.emplace_back(fmt::format("{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n"));
    auto & eosColContent = isPredicted(EOSColName) ? getLastNotEmptyHypConst(EOSColName, getFirstLineIndex()+line) : getLastNotEmptyConst(EOSColName, getFirstLineIndex()+line);
    if (eosColContent == EOSSymbol1)
      flushCurrentSequence();

  flushCurrentSequence();
void Config::printForDebug(FILE * dest) const
{
  static constexpr int windowSize = 5;
Franck Dary's avatar
Franck Dary committed
  static constexpr int lettersWindowSize = 40;
  static constexpr int maxWordLength = 7;

  fmt::print(dest, "\n");
  int firstLineToPrint = wordIndex;
  int lastLineToPrint = wordIndex;
  while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint-1, 0))
  while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint+1, 0))
    ++lastLineToPrint;

  std::vector<std::vector<std::string>> toPrint;

Franck Dary's avatar
Franck Dary committed
  toPrint.emplace_back();
  toPrint.back().emplace_back("");
  for (unsigned int i = 0; i < getNbColumns(); i++)
    toPrint.back().emplace_back(getColName(i));

  for (int line = firstLineToPrint; line <= lastLineToPrint; line++)
  {
Franck Dary's avatar
Franck Dary committed
    if (isComment(line))
      continue;
    toPrint.emplace_back();
    toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : "");
    for (unsigned int i = 0; i < getNbColumns(); i++)
      auto & colContent = isPredicted(getColName(i)) ? getLastNotEmptyHypConst(i, line) : getLastNotEmptyConst(i, line);
      toPrint.back().emplace_back(util::shrink(colContent, maxWordLength));
    }
  }

  std::vector<std::size_t> colLength(toPrint[0].size(), 0);
  for (auto & line : toPrint)
    for (unsigned int col = 0; col < line.size()-1; col++)
      colLength[col] = std::max((int)colLength[col], util::printedLength(line[col]));

Franck Dary's avatar
Franck Dary committed
  int lengthSum = 2*getNbColumns();
  for (auto & val : colLength)
    lengthSum += val;
  std::string longLine = fmt::format("{:-<{}}", "", lengthSum);

  std::string historyStr = "";
  for (auto & h : history)
Franck Dary's avatar
Franck Dary committed
    historyStr += h;
    historyStr += ",";
  }
  if (!historyStr.empty())
    historyStr.pop_back();
Franck Dary's avatar
Franck Dary committed
  std::string stackStr = "";
  for (auto & s : stack)
  {
    if (hasColIndex(idColName))
    {
      if (has(idColName, s, 0))
        stackStr += getLastNotEmptyConst(idColName, s);
      else
        stackStr += "?";
    }
Franck Dary's avatar
Franck Dary committed
    else
      stackStr += std::to_string(s);
    stackStr += ",";
  }
  if (!stackStr.empty())
    stackStr.pop_back();
Franck Dary's avatar
Franck Dary committed
  fmt::print(dest, "{}\n", longLine);
  for (std::size_t index = characterIndex; index < util::getSize(rawInput) and index - characterIndex < lettersWindowSize; index++)
    fmt::print(dest, "{}", getLetter(index));
  if (rawInput.size())
    fmt::print(dest, "\n{}\n", longLine);
Franck Dary's avatar
Franck Dary committed
  fmt::print(dest, "State={}\nwordIndex={} characterIndex={}\nhistory=({})\nstack=({})\n", state, wordIndex, characterIndex, historyStr, stackStr);
Franck Dary's avatar
Franck Dary committed
  fmt::print(dest, "{}\n", longLine);

  for (unsigned int line = 0; line < toPrint.size(); line++)
  {
    if (line == 1)
      fmt::print(dest, "{}\n", longLine);
    for (unsigned int col = 0; col < toPrint[line].size()-1; col++)
      fmt::print(dest, "{}{:>{}}{}", toPrint[line][col], "", colLength[col]-util::printedLength(toPrint[line][col]), col == toPrint[line].size()-2 ? "\n" : "  ");
Franck Dary's avatar
Franck Dary committed
    if (toPrint[line].back() == EOSSymbol1)
Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex)
  int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
Franck Dary's avatar
Franck Dary committed

  for (int i = nbHypothesesMax; i > 0; --i)
    if (!util::isEmpty(lines[baseIndex+i]))
      return lines[baseIndex+i];

  return lines[baseIndex];
}

Config::String & Config::getLastNotEmptyHyp(int colIndex, int lineIndex)
{
  int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);

  for (int i = nbHypothesesMax; i > 0; --i)
    if (!util::isEmpty(lines[baseIndex+i]))
      return lines[baseIndex+i];

  return lines[baseIndex+1];
}

Config::String & Config::getFirstEmpty(int colIndex, int lineIndex)
{
  int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);

Franck Dary's avatar
Franck Dary committed
  for (int i = 1; i < nbHypothesesMax; ++i)
    if (util::isEmpty(lines[baseIndex+i]))
      return lines[baseIndex+i];

  return lines[baseIndex+nbHypothesesMax]; 
}

Config::String & Config::getFirstEmpty(const std::string & colName, int lineIndex)
{
  return getFirstEmpty(getColIndex(colName), lineIndex);
}

Franck Dary's avatar
Franck Dary committed
const Config::String & Config::getLastNotEmptyConst(int colIndex, int lineIndex) const
{
  int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);

  for (int i = nbHypothesesMax; i > 0; --i)
    if (!util::isEmpty(lines[baseIndex+i]))
const Config::String & Config::getLastNotEmptyHypConst(int colIndex, int lineIndex) const
{
  int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);

  for (int i = nbHypothesesMax; i > 0; --i)
    if (!util::isEmpty(lines[baseIndex+i]))
      return lines[baseIndex+i];

  return lines[baseIndex+1];
}

Config::String & Config::getLastNotEmpty(const std::string & colName, int lineIndex)
  return getLastNotEmpty(getColIndex(colName), lineIndex);
Config::String & Config::getLastNotEmptyHyp(const std::string & colName, int lineIndex)
{
  return getLastNotEmptyHyp(getColIndex(colName), lineIndex);
}

Franck Dary's avatar
Franck Dary committed
const Config::String & Config::getLastNotEmptyConst(const std::string & colName, int lineIndex) const
{
  return getLastNotEmptyConst(getColIndex(colName), lineIndex);
}

const Config::String & Config::getLastNotEmptyHypConst(const std::string & colName, int lineIndex) const
{
  return getLastNotEmptyHypConst(getColIndex(colName), lineIndex);
}

Config::ValueIterator Config::getIterator(int colIndex, int lineIndex, int hypothesisIndex)
{
  return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex;
}

Config::ConstValueIterator Config::getConstIterator(int colIndex, int lineIndex, int hypothesisIndex) const
  return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex;
Franck Dary's avatar
Franck Dary committed
void Config::addToHistory(const std::string & transition)
{
  history.push_back(String(transition));
}

Franck Dary's avatar
Franck Dary committed
void Config::addToStack(std::size_t index)
{
  stack.push_back(index);
}

void Config::popStack()
{
  stack.pop_back();
}

Franck Dary's avatar
Franck Dary committed
bool Config::hasCharacter(int letterIndex) const
Franck Dary's avatar
Franck Dary committed
{
  return letterIndex >= 0 and letterIndex < (int)util::getSize(rawInput);
}

util::utf8char Config::getLetter(int letterIndex) const
Franck Dary's avatar
Franck Dary committed
  return rawInput[letterIndex];
Franck Dary's avatar
Franck Dary committed
bool Config::isComment(std::size_t lineIndex) const
{
  auto iter = getConstIterator(0, lineIndex, 0);
  return !iter->get().empty() and iter->get()[0] == '#';
}

bool Config::isMultiword(std::size_t lineIndex) const
{
  return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('-') != std::string::npos;
}

bool Config::isEmptyNode(std::size_t lineIndex) const
{
  return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('.') != std::string::npos;
}

bool Config::isToken(std::size_t lineIndex) const
{
  return !isComment(lineIndex) && !isMultiword(lineIndex) && !isEmptyNode(lineIndex);
}

bool Config::moveWordIndex(int relativeMovement)
{
  int nbMovements = 0;
  int oldVal = wordIndex;
  while (nbMovements != relativeMovement)
  {
    do
    {
      relativeMovement > 0 ? wordIndex++ : wordIndex--;
      if (!has(0,wordIndex,0))
      {
        wordIndex = oldVal;
        return false;
      }
    }
    while (!isToken(wordIndex));
    nbMovements += relativeMovement > 0 ? 1 : -1;
  }

  return true;
Franck Dary's avatar
Franck Dary committed
}

bool Config::canMoveWordIndex(int relativeMovement) const
Franck Dary's avatar
Franck Dary committed
{
  int nbMovements = 0;
  int oldVal = wordIndex;
  while (nbMovements != relativeMovement)
Franck Dary's avatar
Franck Dary committed
  {
Franck Dary's avatar
Franck Dary committed
    {
      relativeMovement > 0 ? oldVal++ : oldVal--;
      if (!has(0,oldVal,0))
        return false;
Franck Dary's avatar
Franck Dary committed
    }
    while (!isToken(oldVal));
    nbMovements += relativeMovement > 0 ? 1 : -1;
Franck Dary's avatar
Franck Dary committed
  }

  return true;
}

bool Config::moveCharacterIndex(int relativeMovement)
{
  int oldVal = characterIndex;
  characterIndex = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput)));
  return (int)characterIndex == oldVal + relativeMovement;
}

bool Config::canMoveCharacterIndex(int relativeMovement) const
{
  int target = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput)));
  return target == (int)characterIndex + relativeMovement;
}

Franck Dary's avatar
Franck Dary committed
bool Config::rawInputOnlySeparatorsLeft() const
{
  for (unsigned int i = characterIndex; i < rawInput.size(); i++)
    if (!util::isSeparator(rawInput[i]))
      return false;

  return true;
}

std::size_t Config::getWordIndex() const
{
  return wordIndex;
}

std::size_t Config::getCharacterIndex() const
{
  return characterIndex;
}

const Config::String & Config::getHistory(int relativeIndex) const
{
  return history[history.size()-1-relativeIndex];
}

std::size_t Config::getStack(int relativeIndex) const
{
  return stack[stack.size()-1-relativeIndex];
}

Franck Dary's avatar
Franck Dary committed
bool Config::hasHistory(int relativeIndex) const
{
  return relativeIndex > 0 && relativeIndex < (int)history.size();
}

bool Config::hasStack(int relativeIndex) const
{
  return relativeIndex >= 0 && relativeIndex < (int)stack.size();
Franck Dary's avatar
Franck Dary committed
Config::String Config::getState() const
{
  return state;
}

void Config::setState(const std::string state)
{
  this->state = state;
}

Franck Dary's avatar
Franck Dary committed
bool Config::stateIsDone() const
{
  if (!rawInput.empty())
    return rawInputOnlySeparatorsLeft();

  return !has(0, wordIndex+1, 0);
}

Franck Dary's avatar
Franck Dary committed
std::vector<long> Config::extractContext(int leftBorder, int rightBorder, Dict & dict) const
Franck Dary's avatar
Franck Dary committed
  std::stack<int> leftContext;
  for (int index = wordIndex-1; has(0,index,0) && (int)leftContext.size() < leftBorder; --index)
    if (isToken(index))
      leftContext.push(dict.getIndexOrInsert(getLastNotEmptyConst("FORM", index)));
Franck Dary's avatar
Franck Dary committed
  std::vector<long> context;
Franck Dary's avatar
Franck Dary committed
  while ((int)context.size() < leftBorder-(int)leftContext.size())
    context.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr));
  while (!leftContext.empty())
  {
    context.emplace_back(leftContext.top());
    leftContext.pop();
  }
Franck Dary's avatar
Franck Dary committed
  for (int index = wordIndex; has(0,index,0) && (int)context.size() < leftBorder+rightBorder+1; ++index)
    if (isToken(index))
      context.emplace_back(dict.getIndexOrInsert(getLastNotEmptyConst("FORM", index)));
Franck Dary's avatar
Franck Dary committed
  while ((int)context.size() < leftBorder+rightBorder+1)
    context.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr));
void Config::addPredicted(const std::set<std::string> & predicted)
{
  this->predicted.insert(predicted.begin(), predicted.end());
}

bool Config::isPredicted(const std::string & colName) const
{
  return predicted.count(colName);
}