#include "Config.hpp" #include "util.hpp" Config::Config(const Utf8String & rawInput) : rawInput(rawInput) { } std::size_t Config::getIndexOfLine(int lineIndex) const { return lineIndex * getNbColumns() * (nbHypothesesMax+1); } std::size_t Config::getIndexOfCol(int colIndex) const { return colIndex * (nbHypothesesMax+1); } void Config::addLines(unsigned int nbLines) { lines.resize(lines.size() + nbLines*getNbColumns()*(nbHypothesesMax+1)); } void Config::resizeLines(unsigned int nbLines) { lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1)); } bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const { return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1; } bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const { return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex); } Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex) { return get(getColIndex(colName), lineIndex, hypothesisIndex); } const Config::String & Config::getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const { return getConst(getColIndex(colName), lineIndex, hypothesisIndex); } Config::String & Config::get(int colIndex, int lineIndex, int hypothesisIndex) { return *getIterator(colIndex, lineIndex, hypothesisIndex); } const Config::String & Config::getConst(int colIndex, int lineIndex, int hypothesisIndex) const { return *getConstIterator(colIndex, lineIndex, hypothesisIndex); } std::size_t Config::getNbLines() const { return lines.size() / getIndexOfCol(getNbColumns()); } void Config::print(FILE * dest) const { std::vector<std::string> currentSequence; std::vector<std::string> currentSequenceComments; auto flushCurrentSequence = [&dest, ¤tSequence, ¤tSequenceComments]() { if (currentSequence.empty() && currentSequenceComments.empty()) return; for (auto & comment : currentSequenceComments) fmt::print(dest, "{}", comment); for (auto & line : currentSequence) fmt::print(dest, "{}", line); fmt::print(dest, "\n"); currentSequence.clear(); currentSequenceComments.clear(); }; for (unsigned int line = 0; line < getNbLines(); line++) { if (isComment(getFirstLineIndex()+line)) { currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0))); continue; } for (unsigned int i = 0; i < getNbColumns()-1; i++) { auto & colContent = isPredicted(getColName(i)) ? getLastNotEmptyHypConst(i, getFirstLineIndex()+line) : getLastNotEmptyConst(i, getFirstLineIndex()+line); std::string valueToPrint = colContent; if (valueToPrint.empty()) valueToPrint = "_"; currentSequence.emplace_back(fmt::format("{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n")); } auto & eosColContent = isPredicted(EOSColName) ? getLastNotEmptyHypConst(EOSColName, getFirstLineIndex()+line) : getLastNotEmptyConst(EOSColName, getFirstLineIndex()+line); if (eosColContent == EOSSymbol1) flushCurrentSequence(); } flushCurrentSequence(); } void Config::printForDebug(FILE * dest) const { static constexpr int windowSize = 5; static constexpr int lettersWindowSize = 40; static constexpr int maxWordLength = 7; fmt::print(dest, "\n"); int firstLineToPrint = wordIndex; int lastLineToPrint = wordIndex; while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint-1, 0)) --firstLineToPrint; while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint+1, 0)) ++lastLineToPrint; std::vector<std::vector<std::string>> toPrint; toPrint.emplace_back(); toPrint.back().emplace_back(""); for (unsigned int i = 0; i < getNbColumns(); i++) toPrint.back().emplace_back(getColName(i)); for (int line = firstLineToPrint; line <= lastLineToPrint; line++) { if (isComment(line)) continue; toPrint.emplace_back(); toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : ""); for (unsigned int i = 0; i < getNbColumns(); i++) { auto & colContent = isPredicted(getColName(i)) ? getLastNotEmptyHypConst(i, line) : getLastNotEmptyConst(i, line); toPrint.back().emplace_back(util::shrink(colContent, maxWordLength)); } } std::vector<std::size_t> colLength(toPrint[0].size(), 0); for (auto & line : toPrint) for (unsigned int col = 0; col < line.size()-1; col++) colLength[col] = std::max((int)colLength[col], util::printedLength(line[col])); int lengthSum = 2*getNbColumns(); for (auto & val : colLength) lengthSum += val; std::string longLine = fmt::format("{:-<{}}", "", lengthSum); std::string historyStr = ""; for (auto & h : history) { historyStr += h; historyStr += ","; } if (!historyStr.empty()) historyStr.pop_back(); std::string stackStr = ""; for (auto & s : stack) { if (hasColIndex(idColName)) { if (has(idColName, s, 0)) stackStr += getLastNotEmptyConst(idColName, s); else stackStr += "?"; } else stackStr += std::to_string(s); stackStr += ","; } if (!stackStr.empty()) stackStr.pop_back(); fmt::print(dest, "{}\n", longLine); for (std::size_t index = characterIndex; index < util::getSize(rawInput) and index - characterIndex < lettersWindowSize; index++) fmt::print(dest, "{}", getLetter(index)); if (rawInput.size()) fmt::print(dest, "\n{}\n", longLine); fmt::print(dest, "State={}\nwordIndex={} characterIndex={}\nhistory=({})\nstack=({})\n", state, wordIndex, characterIndex, historyStr, stackStr); fmt::print(dest, "{}\n", longLine); for (unsigned int line = 0; line < toPrint.size(); line++) { if (line == 1) fmt::print(dest, "{}\n", longLine); for (unsigned int col = 0; col < toPrint[line].size()-1; col++) fmt::print(dest, "{}{:>{}}{}", toPrint[line][col], "", colLength[col]-util::printedLength(toPrint[line][col]), col == toPrint[line].size()-2 ? "\n" : " "); if (toPrint[line].back() == EOSSymbol1) fmt::print(dest, "\n"); } } Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex) { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex]; } Config::String & Config::getLastNotEmptyHyp(int colIndex, int lineIndex) { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+1]; } Config::String & Config::getFirstEmpty(int colIndex, int lineIndex) { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = 1; i < nbHypothesesMax; ++i) if (util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+nbHypothesesMax]; } Config::String & Config::getFirstEmpty(const std::string & colName, int lineIndex) { return getFirstEmpty(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyConst(int colIndex, int lineIndex) const { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex]; } const Config::String & Config::getLastNotEmptyHypConst(int colIndex, int lineIndex) const { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+1]; } Config::String & Config::getLastNotEmpty(const std::string & colName, int lineIndex) { return getLastNotEmpty(getColIndex(colName), lineIndex); } Config::String & Config::getLastNotEmptyHyp(const std::string & colName, int lineIndex) { return getLastNotEmptyHyp(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyConst(const std::string & colName, int lineIndex) const { return getLastNotEmptyConst(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyHypConst(const std::string & colName, int lineIndex) const { return getLastNotEmptyHypConst(getColIndex(colName), lineIndex); } Config::ValueIterator Config::getIterator(int colIndex, int lineIndex, int hypothesisIndex) { return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex; } Config::ConstValueIterator Config::getConstIterator(int colIndex, int lineIndex, int hypothesisIndex) const { return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex; } void Config::addToHistory(const std::string & transition) { history.push_back(String(transition)); } void Config::addToStack(std::size_t index) { stack.push_back(index); } void Config::popStack() { stack.pop_back(); } bool Config::hasCharacter(int letterIndex) const { return letterIndex >= 0 and letterIndex < (int)util::getSize(rawInput); } util::utf8char Config::getLetter(int letterIndex) const { return rawInput[letterIndex]; } bool Config::isComment(std::size_t lineIndex) const { auto iter = getConstIterator(0, lineIndex, 0); return !iter->get().empty() and iter->get()[0] == '#'; } bool Config::isMultiword(std::size_t lineIndex) const { return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('-') != std::string::npos; } bool Config::isEmptyNode(std::size_t lineIndex) const { return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('.') != std::string::npos; } bool Config::isToken(std::size_t lineIndex) const { return !isComment(lineIndex) && !isMultiword(lineIndex) && !isEmptyNode(lineIndex); } bool Config::moveWordIndex(int relativeMovement) { int nbMovements = 0; int oldVal = wordIndex; while (nbMovements != relativeMovement) { do { relativeMovement > 0 ? wordIndex++ : wordIndex--; if (!has(0,wordIndex,0)) { wordIndex = oldVal; return false; } } while (!isToken(wordIndex)); nbMovements += relativeMovement > 0 ? 1 : -1; } return true; } bool Config::canMoveWordIndex(int relativeMovement) const { int nbMovements = 0; int oldVal = wordIndex; while (nbMovements != relativeMovement) { do { relativeMovement > 0 ? oldVal++ : oldVal--; if (!has(0,oldVal,0)) return false; } while (!isToken(oldVal)); nbMovements += relativeMovement > 0 ? 1 : -1; } return true; } bool Config::moveCharacterIndex(int relativeMovement) { int oldVal = characterIndex; characterIndex = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput))); return (int)characterIndex == oldVal + relativeMovement; } bool Config::canMoveCharacterIndex(int relativeMovement) const { int target = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput))); return target == (int)characterIndex + relativeMovement; } bool Config::rawInputOnlySeparatorsLeft() const { for (unsigned int i = characterIndex; i < rawInput.size(); i++) if (!util::isSeparator(rawInput[i])) return false; return true; } std::size_t Config::getWordIndex() const { return wordIndex; } std::size_t Config::getCharacterIndex() const { return characterIndex; } const Config::String & Config::getHistory(int relativeIndex) const { return history[history.size()-1-relativeIndex]; } std::size_t Config::getStack(int relativeIndex) const { return stack[stack.size()-1-relativeIndex]; } bool Config::hasHistory(int relativeIndex) const { return relativeIndex > 0 && relativeIndex < (int)history.size(); } bool Config::hasStack(int relativeIndex) const { return relativeIndex >= 0 && relativeIndex < (int)stack.size(); } Config::String Config::getState() const { return state; } void Config::setState(const std::string state) { this->state = state; } bool Config::stateIsDone() const { if (!rawInput.empty()) return rawInputOnlySeparatorsLeft(); return !has(0, wordIndex+1, 0); } std::vector<long> Config::extractContext(int leftBorder, int rightBorder, Dict & dict) const { std::stack<int> leftContext; for (int index = wordIndex-1; has(0,index,0) && (int)leftContext.size() < leftBorder; --index) if (isToken(index)) leftContext.push(dict.getIndexOrInsert(getLastNotEmptyConst("FORM", index))); std::vector<long> context; while ((int)context.size() < leftBorder-(int)leftContext.size()) context.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr)); while (!leftContext.empty()) { context.emplace_back(leftContext.top()); leftContext.pop(); } for (int index = wordIndex; has(0,index,0) && (int)context.size() < leftBorder+rightBorder+1; ++index) if (isToken(index)) context.emplace_back(dict.getIndexOrInsert(getLastNotEmptyConst("FORM", index))); while ((int)context.size() < leftBorder+rightBorder+1) context.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr)); return context; } void Config::addPredicted(const std::set<std::string> & predicted) { this->predicted.insert(predicted.begin(), predicted.end()); } bool Config::isPredicted(const std::string & colName) const { return predicted.count(colName); }