#include "BaseConfig.hpp"
#include "util.hpp"

void BaseConfig::readMCD(std::string_view mcdFilename)
{
  if (!colIndex2Name.empty())
    util::myThrow("a mcd has already been read for this BaseConfig");

  std::FILE * file = std::fopen(mcdFilename.data(), "r");

  if (not file)
    util::myThrow(fmt::format("Cannot open file '{}'", mcdFilename));

  char lineBuffer[1024];
  while (std::fscanf(file, "%1023[^\n]\n", lineBuffer) == 1)
  {
    colIndex2Name.emplace_back(lineBuffer);
    colName2Index.emplace(lineBuffer, colIndex2Name.size()-1);
  }

  std::fclose(file);

  for (auto & column : extraColumns)
  {
    if (colName2Index.count(column))
      util::myThrow(fmt::format("mcd '{}' must not contain column '{}'", mcdFilename, column));
    colIndex2Name.emplace_back(column);
    colName2Index.emplace(column, colIndex2Name.size()-1);
  }
}

void BaseConfig::readRawInput(std::string_view rawFilename)
{
  std::FILE * file = std::fopen(rawFilename.data(), "r");

  if (not file)
    util::myThrow(fmt::format("Cannot open file '{}'", rawFilename));

  std::string rawInputTemp;

  while (not std::feof(file))
    rawInputTemp.push_back(std::fgetc(file));

  std::fclose(file);

  rawInputUtf8 = util::splitAsUtf8(rawInputTemp);
  rawInputUtf8.replace(util::utf8char("\n"), util::utf8char(" "));
  rawInputUtf8.replace(util::utf8char("\t"), util::utf8char(" "));
}

void BaseConfig::readTSVInput(std::string_view tsvFilename)
{
  std::FILE * file = std::fopen(tsvFilename.data(), "r");

  if (not file)
    util::myThrow(fmt::format("Cannot open file '{}'", tsvFilename));

  char lineBuffer[100000];
  int inputLineIndex = 0;
  bool inputHasBeenRead = false;
  int usualNbCol = -1;
  int nbMultiwords = 0;

  while (!std::feof(file))
  {
    if (lineBuffer != std::fgets(lineBuffer, 100000, file))
      break;

    std::string_view line(lineBuffer);
    inputLineIndex++;

    if (line.size() < 3)
    {
      if (!inputHasBeenRead)
        continue;

      get(EOSColName, getNbLines()-1, 0) = EOSSymbol1;

      try
      {
        std::map<std::string, int> id2index;
        int firstIndexOfSequence = getNbLines()-1;
        for (int i = (int)getNbLines()-1; has(0, i, 0); --i)
        {
          if (!isToken(i))
            continue;

          if (i != (int)getNbLines()-1 && getConst(EOSColName, i, 0) == EOSSymbol1)
            break;

          firstIndexOfSequence = i;
          id2index[getConst(idColName, i, 0)] = i;
        }
        if (hasColIndex(headColName))
          for (int i = firstIndexOfSequence; i < (int)getNbLines(); ++i)
          {
            if (!isToken(i))
              continue;
            auto & head = get(headColName, i, 0);
            if (head == "0")
              continue;
            head = std::to_string(id2index[head]);
          }
      } catch(std::exception & e) {util::myThrow(e.what());}

      continue;
    }

    if (line.back() == '\n')
      line.remove_suffix(1);

    if (line[0] == '#')
    {
      addLines(1);
      get(EOSColName, getNbLines()-1, 0) = EOSSymbol0;
      get(isMultiColName, getNbLines()-1, 0) = EOSSymbol0;
      get(0, getNbLines()-1, 0) = std::string(line);
      continue;
    }

    inputHasBeenRead = true;

    auto splited = util::split(line, '\t');
    if (usualNbCol == -1)
      usualNbCol = splited.size();
    if ((int)splited.size() != usualNbCol)
      util::myThrow(fmt::format("in file {} line {} is invalid, it shoud have {} columns", tsvFilename, line, usualNbCol));

    // Ignore empty nodes
    if (hasColIndex(idColName) && splited[getColIndex(idColName)].find('.') != std::string::npos)
      continue;

    addLines(1);
    get(EOSColName, getNbLines()-1, 0) = EOSSymbol0;
    if (nbMultiwords > 0)
    {
      get(isMultiColName, getNbLines()-1, 0) = EOSSymbol1;
      nbMultiwords--;
    }
    else
      get(isMultiColName, getNbLines()-1, 0) = EOSSymbol0;

    for (unsigned int i = 0; i < splited.size(); i++)
      if (i < colIndex2Name.size())
      {
        std::string value = std::string(splited[i]);
        get(i, getNbLines()-1, 0) = value;
      }

    if (isMultiword(getNbLines()-1))
      nbMultiwords = getMultiwordSize(getNbLines()-1)+1;
  }

  std::fclose(file);
}

BaseConfig::BaseConfig(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) : Config(rawInputUtf8)
{
  if (tsvFilename.empty() and rawFilename.empty())
    util::myThrow("tsvFilename and rawFilenames can't be both empty");
  if (mcdFilename.empty())
    util::myThrow("mcdFilename can't be empty");

  readMCD(mcdFilename);

  if (not rawFilename.empty())
    readRawInput(rawFilename);

  if (not tsvFilename.empty())
    readTSVInput(tsvFilename);

  if (!has(0,wordIndex,0))
  {
    addComment();
    addLines(1);
  }

  if (isComment(wordIndex))
    moveWordIndex(1);
}

std::size_t BaseConfig::getNbColumns() const
{
  return colIndex2Name.size();
}

std::size_t BaseConfig::getColIndex(const std::string & colName) const
{
  auto it = colName2Index.find(colName);
  if (it == colName2Index.end())
    util::myThrow(fmt::format("unknown column name '{}'", colName));
  return it->second;
}

bool BaseConfig::hasColIndex(const std::string & colName) const
{
  return colName2Index.count(colName);
}

const std::string & BaseConfig::getColName(int colIndex) const
{
  return colIndex2Name[colIndex];
}

std::size_t BaseConfig::getFirstLineIndex() const
{
  return 0;
}