#include "Config.hpp" #include "util.hpp" Config::Config(const Utf8String & rawInput) : rawInput(rawInput) { } std::size_t Config::getIndexOfLine(int lineIndex) const { return lineIndex * getNbColumns() * (nbHypothesesMax+1); } std::size_t Config::getIndexOfCol(int colIndex) const { return colIndex * (nbHypothesesMax+1); } void Config::addLines(unsigned int nbLines) { lines.resize(lines.size() + nbLines*getNbColumns()*(nbHypothesesMax+1)); } void Config::addComment() { lines.resize(lines.size() + getNbColumns()*(nbHypothesesMax+1)); get(0, getNbLines()-1, 0) = "#"; getLastNotEmptyHyp(0, getNbLines()-1) = "#"; } void Config::resizeLines(unsigned int nbLines) { lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1)); } bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const { return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1; } bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const { return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex); } Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex) { return get(getColIndex(colName), lineIndex, hypothesisIndex); } const Config::String & Config::getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const { return getConst(getColIndex(colName), lineIndex, hypothesisIndex); } Config::String & Config::get(int colIndex, int lineIndex, int hypothesisIndex) { return *getIterator(colIndex, lineIndex, hypothesisIndex); } const Config::String & Config::getConst(int colIndex, int lineIndex, int hypothesisIndex) const { return *getConstIterator(colIndex, lineIndex, hypothesisIndex); } std::size_t Config::getNbLines() const { return lines.size() / getIndexOfCol(getNbColumns()); } void Config::print(FILE * dest) const { std::vector<std::string> currentSequence; std::vector<std::string> currentSequenceComments; auto flushCurrentSequence = [&dest, ¤tSequence, ¤tSequenceComments]() { if (currentSequence.empty() && currentSequenceComments.empty()) return; for (auto & comment : currentSequenceComments) fmt::print(dest, "{}", comment); for (auto & line : currentSequence) fmt::print(dest, "{}", line); fmt::print(dest, "\n"); currentSequence.clear(); currentSequenceComments.clear(); }; for (unsigned int line = 0; line < getNbLines(); line++) { if (isComment(getFirstLineIndex()+line)) { currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0))); continue; } for (unsigned int i = 0; i < getNbColumns()-1; i++) { auto & colContent = getAsFeature(i, getFirstLineIndex()+line); std::string valueToPrint = colContent; try { if (getColName(i) == headColName) if (valueToPrint != "0") valueToPrint = getAsFeature(idColName, std::stoi(valueToPrint)); } catch(std::exception &) {} if (valueToPrint.empty()) valueToPrint = "_"; currentSequence.emplace_back(fmt::format("{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n")); } auto & eosColContent = getAsFeature(EOSColName, getFirstLineIndex()+line); if (eosColContent == EOSSymbol1) flushCurrentSequence(); } flushCurrentSequence(); } void Config::printForDebug(FILE * dest) const { static constexpr int windowSize = 10; static constexpr int lettersWindowSize = 40; static constexpr int maxWordLength = 7; fmt::print(dest, "\n"); int firstLineToPrint = wordIndex; int lastLineToPrint = wordIndex; while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint-1, 0)) --firstLineToPrint; while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint+1, 0)) ++lastLineToPrint; std::vector<std::vector<std::string>> toPrint; toPrint.emplace_back(); toPrint.back().emplace_back(""); for (unsigned int i = 0; i < getNbColumns(); i++) toPrint.back().emplace_back(getColName(i)); for (int line = firstLineToPrint; line <= lastLineToPrint; line++) { if (isComment(line)) continue; toPrint.emplace_back(); toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : ""); for (unsigned int i = 0; i < getNbColumns(); i++) { auto & colContent = getAsFeature(i, line); std::string toPrintCol = colContent; try { if (getColName(i) == headColName && toPrintCol != "_" && !toPrintCol.empty()) if (toPrintCol != "0") toPrintCol = getAsFeature(idColName, std::stoi(toPrintCol)); } catch(std::exception & e) {util::myThrow(fmt::format("toPrintCol='{}' {}", toPrintCol, e.what()));} toPrint.back().emplace_back(util::shrink(toPrintCol, maxWordLength)); } } std::vector<std::size_t> colLength(toPrint[0].size(), 0); for (auto & line : toPrint) for (unsigned int col = 0; col < line.size()-1; col++) colLength[col] = std::max((int)colLength[col], util::printedLength(line[col])); int lengthSum = 2*getNbColumns(); for (auto & val : colLength) lengthSum += val; std::string longLine = fmt::format("{:-<{}}", "", lengthSum); std::string historyStr = ""; for (auto & h : history) { historyStr += h; historyStr += ","; } if (!historyStr.empty()) historyStr.pop_back(); std::string stackStr = ""; for (auto & s : stack) { if (hasColIndex(idColName)) { if (has(idColName, s, 0)) stackStr += getAsFeature(idColName, s); else stackStr += "?"; } else stackStr += std::to_string(s); stackStr += ","; } if (!stackStr.empty()) stackStr.pop_back(); fmt::print(dest, "{}\n", longLine); for (std::size_t index = characterIndex; index < util::getSize(rawInput) and index - characterIndex < lettersWindowSize; index++) fmt::print(dest, "{}", getLetter(index)); if (rawInput.size()) fmt::print(dest, "\n{}\n", longLine); fmt::print(dest, "State={}\nwordIndex={} characterIndex={}\nhistory=({})\nstack=({})\n", state, wordIndex, characterIndex, historyStr, stackStr); fmt::print(dest, "{}\n", longLine); for (unsigned int line = 0; line < toPrint.size(); line++) { if (line == 1) fmt::print(dest, "{}\n", longLine); for (unsigned int col = 0; col < toPrint[line].size()-1; col++) fmt::print(dest, "{}{:>{}}{}", toPrint[line][col], "", colLength[col]-util::printedLength(toPrint[line][col]), col == toPrint[line].size()-2 ? "\n" : " "); if (toPrint[line].back() == EOSSymbol1) fmt::print(dest, "\n"); } } Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex) { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex]; } Config::String & Config::getLastNotEmptyHyp(int colIndex, int lineIndex) { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+1]; } Config::String & Config::getFirstEmpty(int colIndex, int lineIndex) { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = 1; i < nbHypothesesMax; ++i) if (util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+nbHypothesesMax]; } Config::String & Config::getFirstEmpty(const std::string & colName, int lineIndex) { return getFirstEmpty(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyConst(int colIndex, int lineIndex) const { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex]; } const Config::String & Config::getLastNotEmptyHypConst(int colIndex, int lineIndex) const { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+1]; } const Config::String & Config::getAsFeature(int colIndex, int lineIndex) const { if (isPredicted(getColName(colIndex))) return getLastNotEmptyHypConst(colIndex, lineIndex); return getLastNotEmptyConst(colIndex, lineIndex); } const Config::String & Config::getAsFeature(const std::string & colName, int lineIndex) const { return getAsFeature(getColIndex(colName), lineIndex); } Config::String & Config::getLastNotEmpty(const std::string & colName, int lineIndex) { return getLastNotEmpty(getColIndex(colName), lineIndex); } Config::String & Config::getLastNotEmptyHyp(const std::string & colName, int lineIndex) { return getLastNotEmptyHyp(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyConst(const std::string & colName, int lineIndex) const { return getLastNotEmptyConst(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyHypConst(const std::string & colName, int lineIndex) const { return getLastNotEmptyHypConst(getColIndex(colName), lineIndex); } Config::ValueIterator Config::getIterator(int colIndex, int lineIndex, int hypothesisIndex) { return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex; } Config::ConstValueIterator Config::getConstIterator(int colIndex, int lineIndex, int hypothesisIndex) const { return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex; } void Config::addToHistory(const std::string & transition) { history.push_back(String(transition)); } void Config::addToStack(std::size_t index) { stack.push_back(index); } void Config::popStack() { lastPoppedStack = getStack(0); stack.pop_back(); } bool Config::hasCharacter(int letterIndex) const { return letterIndex >= 0 and letterIndex < (int)util::getSize(rawInput); } util::utf8char Config::getLetter(int letterIndex) const { return rawInput[letterIndex]; } bool Config::isComment(std::size_t lineIndex) const { auto iter = getConstIterator(0, lineIndex, 0); return !iter->get().empty() and iter->get()[0] == '#'; } bool Config::isCommentPredicted(std::size_t lineIndex) const { auto & col0 = getAsFeature(0, lineIndex); return !util::isEmpty(col0) and col0.get()[0] == '#'; } bool Config::isMultiword(std::size_t lineIndex) const { return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('-') != std::string::npos; } bool Config::isMultiwordPredicted(std::size_t lineIndex) const { return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('-') != std::string::npos; } int Config::getMultiwordSize(std::size_t lineIndex) const { auto splited = util::split(getConst(idColName, lineIndex, 0).get(), '-'); return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0])); } int Config::getMultiwordSizePredicted(std::size_t lineIndex) const { auto splited = util::split(getAsFeature(idColName, lineIndex).get(), '-'); return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0])); } bool Config::isEmptyNode(std::size_t lineIndex) const { return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('.') != std::string::npos; } bool Config::isEmptyNodePredicted(std::size_t lineIndex) const { return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('.') != std::string::npos; } bool Config::isToken(std::size_t lineIndex) const { return !isComment(lineIndex) && !isMultiword(lineIndex) && !isEmptyNode(lineIndex); } bool Config::isTokenPredicted(std::size_t lineIndex) const { return !isCommentPredicted(lineIndex) && !isMultiwordPredicted(lineIndex) && !isEmptyNodePredicted(lineIndex); } bool Config::moveWordIndex(int relativeMovement) { int nbMovements = 0; int oldVal = wordIndex; while (nbMovements != relativeMovement) { do { relativeMovement > 0 ? wordIndex++ : wordIndex--; if (!has(0,wordIndex,0)) { wordIndex = oldVal; return false; } } while (isComment(wordIndex)); nbMovements += relativeMovement > 0 ? 1 : -1; } return true; } bool Config::canMoveWordIndex(int relativeMovement) const { int nbMovements = 0; int oldVal = wordIndex; while (nbMovements != relativeMovement) { do { relativeMovement > 0 ? oldVal++ : oldVal--; if (!has(0,oldVal,0)) return false; } while (isComment(oldVal)); nbMovements += relativeMovement > 0 ? 1 : -1; } return true; } bool Config::moveCharacterIndex(int relativeMovement) { int oldVal = characterIndex; characterIndex = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput))); return (int)characterIndex == oldVal + relativeMovement; } bool Config::canMoveCharacterIndex(int relativeMovement) const { int target = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput))); return target == (int)characterIndex + relativeMovement; } bool Config::rawInputOnlySeparatorsLeft() const { for (unsigned int i = characterIndex; i < rawInput.size(); i++) if (!util::isSeparator(rawInput[i])) return false; return true; } std::size_t Config::getWordIndex() const { return wordIndex; } std::size_t Config::getCharacterIndex() const { return characterIndex; } const Config::String & Config::getHistory(int relativeIndex) const { return history[history.size()-1-relativeIndex]; } std::size_t Config::getStack(int relativeIndex) const { return stack[stack.size()-1-relativeIndex]; } bool Config::hasHistory(int relativeIndex) const { return relativeIndex >= 0 && relativeIndex < (int)history.size(); } bool Config::hasStack(int relativeIndex) const { return relativeIndex >= 0 && relativeIndex < (int)stack.size(); } Config::String Config::getState() const { return state; } void Config::setState(const std::string state) { this->state = state; } bool Config::stateIsDone() const { if (!rawInput.empty()) return rawInputOnlySeparatorsLeft(); return !has(0, wordIndex+1, 0) and !hasStack(0); } void Config::addPredicted(const std::set<std::string> & predicted) { for (auto & col : predicted) { if (!hasColIndex(col)) util::myThrow(fmt::format("unknown column '{}'", col)); this->predicted.insert(col); } } bool Config::isPredicted(const std::string & colName) const { return predicted.count(colName); } int Config::getLastPoppedStack() const { return lastPoppedStack; } int Config::getCurrentWordId() const { return currentWordId; } void Config::setCurrentWordId(int currentWordId) { this->currentWordId = currentWordId; } void Config::addMissingColumns() { int firstIndex = 0; for (unsigned int index = 0; index < getNbLines(); index++) { if (!isTokenPredicted(index)) continue; if (util::isEmpty(getAsFeature(idColName, index))) { int last = 0; if (index > 0 and isTokenPredicted(index-1)) last = std::stoi(getAsFeature(idColName, index-1)); getLastNotEmptyHyp(idColName, index) = std::to_string(last+1); } int curId = std::stoi(getAsFeature(idColName, index)); if (curId == 1) firstIndex = index; if (util::isEmpty(getAsFeature(headColName, index))) getLastNotEmptyHyp(headColName, index) = (curId == 1) ? "0" : std::to_string(firstIndex); } } long Config::getRelativeWordIndex(int relativeIndex) const { if (relativeIndex < 0) { for (int index = getWordIndex()-1, counter = 0; has(0,index,0); --index) if (!isCommentPredicted(index)) { --counter; if (counter == relativeIndex) return index; } } else { for (int index = getWordIndex(), counter = 0; has(0,index,0); ++index) if (!isCommentPredicted(index)) { if (counter == relativeIndex) return index; ++counter; } } return -1; }