Skip to content
Snippets Groups Projects
Select Git revision
  • 6988fc04910ee289b0805172b25ec77d3ed48272
  • master default protected
  • fullUD
  • movementInAction
4 results

Config.cpp

Blame
  • Config.cpp 14.03 KiB
    #include "Config.hpp"
    #include <algorithm>
    #include "File.hpp"
    #include "ProgramParameters.hpp"
    #include "Action.hpp"
    #include "ProgramOutput.hpp"
    #include "utf8.hpp"
    
    Config::Config(BD & bd, const std::string inputFilename) : bd(bd), hashHistory(HISTORY_SIZE), pastActions(HISTORY_SIZE)
    {
      this->outputFile = nullptr;
      this->stackHistory = -1;
      this->inputFilename = inputFilename;
      this->lastIndexPrinted = -1;
      head = 0;
      inputAllRead = false;
      for(int i = 0; i < bd.getNbLines(); i++)
        tapes.emplace_back(bd.getNameOfLine(i), bd.lineIsKnown(i));
      this->totalEntropy = 0;
    }
    
    Config::Config(const Config & other) : bd(other.bd), hashHistory(other.hashHistory), pastActions(other.pastActions)
    {
      this->currentStateName = other.currentStateName;
      this->actionHistory = other.actionHistory;
      this->entropyHistory = other.entropyHistory;
      this->stack = other.stack;
      this->stackHistory = other.stackHistory;
      this->head = other.head;
      this->outputFile = other.outputFile;
      this->lastIndexPrinted = other.lastIndexPrinted;
      this->tapes = other.tapes;
      this->totalEntropy = other.totalEntropy;
    
      this->inputFilename = other.inputFilename;
      this->inputAllRead = other.inputAllRead;
      this->file.reset(new File(*other.file.get()));
    }
    
    Config::Tape::Tape(const std::string & name, bool isKnown) : ref(ProgramParameters::readSize*4+1, Dict::unknownValueStr), hyp(ProgramParameters::readSize*4+1, std::make_pair(Dict::unknownValueStr, 0.0))
    {
      this->head = 0;
      this->name = name;
      this->isKnown = isKnown;
      this->totalEntropy = 0.0;
    }
    
    bool Config::hasTape(const std::string & name)
    {
      return bd.hasLineOfName(name);
    }
    
    Config::Tape & Config::getTape(const std::string & name)
    {
      return tapes[bd.getLineOfName(name)];
    }
    
    Config::Tape & Config::getTapeByInputCol(int col)
    {
      return tapes[bd.getLineOfInputCol(col)];
    }
    
    void Config::readInput()
    {
      if (inputAllRead)
        return;
    
      if (!file.get())
        file.reset(new File(inputFilename, "r"));
      FILE * fd = file->getDescriptor();
    
      char buffer[100000];
      std::vector<std::string> cols;
      unsigned int usualColsSize = 0;
    
      int toRead = ProgramParameters::readSize;
      int haveRead = 0;
    
      while(haveRead < toRead && fscanf(fd, "%[^\n]\n", buffer) == 1)
      {
        if (!utf8::is_valid(buffer, buffer+std::strlen(buffer)))
        {
          fprintf(stderr, "ERROR (%s) : input (%s) line %d is not toally utf-8 formated. Aborting.\n", ERRINFO, inputFilename.c_str(), tapes[0].size());
          exit(1);
        }
    
        cols = split(buffer, '\t');
        if (!usualColsSize)
          usualColsSize = cols.size();
    
        if (cols.size() != usualColsSize)
        {
          fprintf(stderr, "ERROR (%s) : input (%s) line %d has %lu columns instead of %u. Aborting.\n", ERRINFO, inputFilename.c_str(), tapes[0].size(), cols.size(), usualColsSize);
          exit(1);
        }
    
        printAsOutput(outputFile, tapes[0].getNextOverridenDataIndex(), tapes[0].getNextOverridenRealIndex());
    
        for(unsigned int i = 0; i < cols.size(); i++)
          if(bd.hasLineOfInputCol(i))
          {
            auto & tape = getTapeByInputCol(i);
    
            tape.addToRef(cols[i]);
            tape.addToHyp("");
    
            if (tape.getName() == ProgramParameters::tapeToMask)
              if (choiceWithProbability(ProgramParameters::maskRate))
                tape.maskIndex(tape.refSize()-1);
          }
    
        haveRead++;
      }
    
      // Making all tapes the same size
      int maxTapeSize = 0;
      for(auto & tape : tapes)
        maxTapeSize = std::max<unsigned int>(maxTapeSize, tape.refSize());
    
      if (haveRead < toRead || tapes[0].size() == ProgramParameters::tapeSize)
      {
        printAsOutput(outputFile, tapes[0].getNextOverridenDataIndex(), tapes[0].getNextOverridenRealIndex());
        inputAllRead = true;
      }
    
      for(auto & tape : tapes)
      {
        while(tape.refSize() < maxTapeSize)
          tape.addToRef("");
    
        while(tape.hypSize() < maxTapeSize)
          tape.addToHyp("");
    
        if (inputAllRead)
        {
          tape.addToRef("0");
          tape.addToHyp("");
        }
      }
    }
    
    void Config::printForDebug(FILE * output)
    {
      int window = 5;
    
      std::vector< std::vector<std::string> > cols;
      cols.emplace_back();
      cols[0].emplace_back();
      cols[0].emplace_back();
    
      for(auto & tape : tapes)
      {
        cols[0].emplace_back(tape.getName());
    
        for(int i = std::max(0, head-window); i < std::min(tape.hypSize(), head+window); i++)
        {
          unsigned int colIndex = i - std::max(0, head-window)+1;
          while(cols.size() <= colIndex)
            cols.emplace_back();
    
          if(&tape == &tapes[0])
          {
            cols[colIndex].emplace_back(i == head ? "head" : std::to_string(i));
            cols[colIndex].emplace_back(i == head ? " || " : "");
          }
    
          cols[colIndex].emplace_back(shrinkString(tape[i-head], 10, ".."));
        }
      }
    
      fprintf(output, "Configuration : %.2f entropy\n", totalEntropy);
    
      for(int i = 0; i < 80; i++)
        fprintf(output, "-%s", i == 80-1 ? "\n" : "");
    
      printColumns(output, cols, 3);
    
      fprintf(output, "Stack : ");
      for(int s : stack)
        fprintf(output, "%d ", s);
      fprintf(output, "\n");
    
      for(int i = 0; i < 80; i++)
        fprintf(output, "-%s", i == 80-1 ? "\n" : "");
    }
    
    void Config::printAsExample(FILE *)
    {
      fprintf(stderr, "ERROR (%s) : not supported. Aborting.\n", ERRINFO);
      exit(1);
    }
    
    void Config::printAsOutput(FILE * output, int dataIndex, int realIndex)
    {
      if (dataIndex == -1 || !output)
        return;
    
      lastIndexPrinted = dataIndex;
    
      std::vector< std::pair<std::string, float> > toPrint;
      for (unsigned int j = 0; j < tapes.size(); j++)
      {
        if(bd.mustPrintLine(j))
          toPrint.emplace_back(tapes[j][dataIndex-head].empty() ? "0" : tapes[j][dataIndex-head].c_str(), tapes[j].getEntropy(dataIndex-head));
      }
    
      ProgramOutput::instance.addLine(toPrint, realIndex);
    }
    
    void Config::moveHead(int mvt)
    {
      if (head + mvt < tapes[0].size())
      {
        head += mvt;
    
        for (auto & tape : tapes)
          tape.moveHead(mvt);
    
        if (mvt > 0 && head % ProgramParameters::readSize == 0 && head >= (3*ProgramParameters::readSize))
          readInput();
      }
      else if (!endOfTapes())
      {
        fprintf(stderr, "ERROR (%s) : Input has not been read completely, yet the head is already at the end of tapes. Aborting.\n", ERRINFO);
        exit(1);
      }
    }
    
    bool Config::isFinal()
    {
      return endOfTapes() && stack.empty();
    }
    
    void Config::reset()
    {
      for (auto & tape : tapes)
        tape.clear();
    
      actionHistory.clear();
      pastActions.clear();
      hashHistory.clear();
    
      actionsHistory.clear();
    
      stack.clear();
      stackHistory = -1;
    
      inputAllRead = false;
      head = 0;
    
      file.reset();
      while (tapes[0].size() < ProgramParameters::readSize*4 && !inputAllRead)
        readInput();
    }
    
    const std::string & Config::Tape::operator[](int relativeIndex)
    {
      if(isKnown)
        return getRef(relativeIndex);
    
      return getHyp(relativeIndex);
    }
    
    float Config::Tape::getEntropy(int relativeIndex)
    {
      if(isKnown)
        return 0.0;
    
      return hyp.get(head + relativeIndex).second;
    }
    
    const std::string & Config::Tape::getRef(int relativeIndex)
    {
      return ref.get(head + relativeIndex);
    }
    
    const std::string & Config::Tape::getHyp(int relativeIndex)
    {
      return hyp.get(head + relativeIndex).first;
    }
    
    void Config::Tape::setHyp(int relativeIndex, const std::string & elem)
    {
      hyp.set(head + relativeIndex, std::pair<std::string,float>(elem,totalEntropy));
    }
    
    std::string & Config::getCurrentStateName()
    {
      if(currentStateName.empty())
      {
        fprintf(stderr, "ERROR (%s) : currentStateName is empty. Aborting.\n", ERRINFO);
        exit(1);
      }
    
      return currentStateName;
    }
    
    void Config::setCurrentStateName(const std::string & name)
    {
      this->currentStateName = name;
    }
    
    LimitedStack<std::string> & Config::getCurrentStateHistory()
    {
      if (!actionHistory.count(getCurrentStateName()))
        actionHistory.emplace(getCurrentStateName(), HISTORY_SIZE);
    
      return actionHistory.find(getCurrentStateName())->second;
    }
    
    LimitedStack<std::string> & Config::getStateHistory(const std::string & state)
    {
      if (!actionHistory.count(state))
        actionHistory.emplace(state, HISTORY_SIZE);
    
      return actionHistory.find(state)->second;
    }
    
    LimitedStack<float> & Config::getCurrentStateEntropyHistory()
    {
      if (!entropyHistory.count(getCurrentStateName()))
        entropyHistory.emplace(getCurrentStateName(), HISTORY_SIZE);
    
      return entropyHistory.find(getCurrentStateName())->second;
    }
    
    void Config::shuffle(const std::string & delimiterTape, const std::string & delimiter)
    {
      auto & tape = getTape(delimiterTape);
      std::vector< std::pair<unsigned int, unsigned int> > delimiters;
    
      unsigned int previousIndex = 0;
      for (int i = 0; i < tape.refSize(); i++)
        if (tape.getRef(i-head) == delimiter)
        {
          delimiters.emplace_back(previousIndex, i);
          previousIndex = i+1;
        }
    
      if (delimiters.empty())
      {
        fprintf(stderr, "ERROR (%s) : Requested to shuffle based on tape \'%s\' with \'%s\' as a delimiter, but none as been found. Aborting.\n", ERRINFO, delimiterTape.c_str(), delimiter.c_str());
        exit(1);
      }
    
      std::pair<unsigned int, unsigned int> suffix = {delimiters.back().second+1, tape.refSize()-1};
    
      std::random_shuffle(delimiters.begin(), delimiters.end());
    
      auto newTapes = tapes;
    
      for (unsigned int tape = 0; tape < tapes.size(); tape++)
      {
        newTapes[tape].clearDataForCopy();
    
        for (auto & delimiter : delimiters)
          newTapes[tape].copyPart(tapes[tape], delimiter.first, delimiter.second+1);
    
        if (suffix.first <= suffix.second)
          newTapes[tape].copyPart(tapes[tape], suffix.first, suffix.second+1);
      }
    
      tapes = newTapes;
    }
    
    int Config::stackGetElem(int index) const
    {
      if (index == -1)
        return stackHistory;
    
      if (index < 0 || index >= (int)stack.size())
      {
        fprintf(stderr, "ERROR (%s) : requested element index \'%d\' in the stack. Aborting.\n", ERRINFO, index);
        exit(1);
      }
    
      return stack[stack.size()-1-index];
    }
    
    bool Config::stackHasIndex(int index) const
    {
      return index == -1 || (index >= 0 && index < (int)stack.size());
    }
    
    bool Config::stackEmpty() const
    {
      return !stackHasIndex(0);
    }
    
    void Config::stackPop()
    {
      if (stack.empty())
      {
        fprintf(stderr, "ERROR (%s) : Popping empty stack. Aborting.\n", ERRINFO);
        exit(1);
      }
    
      stackHistory = stack.back();
      stack.pop_back();
    }
    
    void Config::stackPush(int elem)
    {
      stack.push_back(elem);
    }
    
    int Config::stackTop()
    {
      if (stack.empty())
      {
        fprintf(stderr, "ERROR (%s) : Requesting back element of empty stack. Aborting.\n", ERRINFO);
        exit(1);
      }
    
      return stack.back();
    }
    
    int Config::stackSize() const
    {
      return stack.size();
    }
    
    void Config::loadFromFile(File &)
    {
      fprintf(stderr, "ERROR (%s) : not supported. Aborting.\n", ERRINFO);
      exit(1);
    }
    
    void Config::addToEntropyHistory(float entropy)
    {
      if (!entropyHistory.count(getCurrentStateName()))
        entropyHistory.emplace(getCurrentStateName(), HISTORY_SIZE);
    
      entropyHistory.find(getCurrentStateName())->second.push(entropy);
    }
    
    std::size_t Config::computeHash()
    {
      static int window = 3;
    
      int start = std::max(0, head-window);
      int end = std::min(tapes[0].refSize()-1, head+window);
    
      std::hash<std::string> hasher;
      std::size_t result = 0;
    
      for (int i = start; i < end; i++)
        for (auto & tape : tapes)
          result ^= (hasher(tape[i-head])*0x9e3779b9+(result << 6)+(result >>2));
    
      return result;
    }
    
    void Config::addHashToHistory()
    {
      hashHistory.push(computeHash());
    }
    
    Dict * Config::getDictOfLine(int num)
    {
      return bd.getDictOfLine(num);
    }
    
    Dict * Config::getDictOfLine(const std::string & name)
    {
      return bd.getDictOfLine(name);
    }
    
    int Config::getHead() const
    {
      return head;
    }
    
    const std::string & Config::Tape::getName()
    {
      return name;
    }
    
    void Config::Tape::moveHead(int mvt)
    {
      head += mvt;
    }
    
    bool Config::endOfTapes() const
    {
      return inputAllRead && tapes[0].headIsAtEnd();
    }
    
    bool Config::Tape::headIsAtEnd() const
    {
      return head == ref.getLastIndex();
    }
    
    int Config::Tape::size()
    {
      return refSize();
    }
    
    int Config::Tape::dataSize()
    {
      return ref.getDataSize();
    }
    
    int Config::Tape::refSize()
    {
      return ref.getLastIndex()+1;
    }
    
    int Config::Tape::hypSize()
    {
      return hyp.getLastIndex()+1;
    }
    
    void Config::Tape::addToHyp(const std::string & elem)
    {
      hyp.push(std::pair<std::string, float>(elem,totalEntropy));
    }
    
    void Config::Tape::addToRef(const std::string & elem)
    {
      ref.push(elem);
    }
    
    void Config::Tape::clear()
    {
      head = 0;
      ref.clear();
      hyp.clear();
    }
    
    void Config::Tape::copyPart(Tape & other, unsigned int from, unsigned int to)
    {
      ref.copy(other.ref, from, to);
      hyp.copy(other.hyp, from, to);
    
    }
    
    void Config::Tape::clearDataForCopy()
    {
      ref.clearData();
      hyp.clearData();
    }
    
    void Config::setOutputFile(FILE * outputFile)
    {
      this->outputFile = outputFile;
    }
    
    int Config::Tape::getNextOverridenDataIndex()
    {
      return ref.getNextOverridenDataIndex();
    }
    
    int Config::Tape::getNextOverridenRealIndex()
    {
      return ref.getNextOverridenRealIndex();
    }
    
    void Config::printTheRest()
    {
      if (!outputFile)
        return;
    
      int tapeSize = tapes[0].size();
    
      int goalPrintIndex = lastIndexPrinted;
      int realIndex = tapeSize - 1 - ((((tapes[0].dataSize()-(goalPrintIndex == -1 ? 0 : 0)))-(goalPrintIndex+1))+(goalPrintIndex));
      for (int i = goalPrintIndex+1; i < (tapes[0].dataSize()-(goalPrintIndex == -1 ? 1 : 0)); i++)
      {
        printAsOutput(outputFile, i, realIndex);
        realIndex++;
      }
      for (int i = 0; i < goalPrintIndex; i++)
      {
        printAsOutput(outputFile, i, realIndex);
        realIndex++;
      }
    }
    
    void Config::setEntropy(float entropy)
    {
      totalEntropy = entropy;
      for (auto & tape : tapes)
        tape.setTotalEntropy(totalEntropy);
    }
    
    float Config::getEntropy() const
    {
      return totalEntropy;
    }
    
    void Config::addToEntropy(float entropy)
    {
      totalEntropy += entropy;
      for (auto & tape : tapes)
        tape.setTotalEntropy(totalEntropy);
    }
    
    void Config::Tape::setTotalEntropy(float entropy)
    {
      totalEntropy = entropy;
    }
    
    void Config::Tape::maskIndex(int index)
    {
      ref.maskIndex(index);
    }
    
    void Config::printColumnInfos(unsigned int index)
    {
      for (auto & tape : tapes)
        fprintf(stderr, "%s\t: %s\n", tape.getName().c_str(), tape[index-getHead()].c_str());
    
      fprintf(stderr, "\n");
    }
    
    void Config::addToActionsHistory(std::string & classifier, std::string & action, int cost)
    {
      actionsHistory[classifier+"_"+std::to_string(head)].emplace_back(action, cost);
    }
    
    std::vector< std::pair<std::string, int> > & Config::getActionsHistory(std::string & classifier)
    {
      return actionsHistory[classifier+"_"+std::to_string(head)];
    }