#include "Config.hpp" #include "util.hpp" Config::Config(const Config & other) { this->lines = other.lines; this->predicted = other.predicted; this->lastPoppedStack = other.lastPoppedStack; this->lastAttached = other.lastAttached; this->currentWordId = other.currentWordId; this->appliableSplitTransitions = other.appliableSplitTransitions; this->appliableTransitions = other.appliableTransitions; this->strategy.reset(new Strategy(*other.strategy)); this->rawInput = other.rawInput; this->wordIndex = other.wordIndex; this->characterIndex = other.characterIndex; this->state = other.state; this->history = other.history; this->stack = other.stack; this->extraColumns = this->extraColumns; } std::size_t Config::getIndexOfLine(int lineIndex) const { return lineIndex * getNbColumns() * (nbHypothesesMax+1); } std::size_t Config::getIndexOfCol(int colIndex) const { return colIndex * (nbHypothesesMax+1); } void Config::addLines(unsigned int nbLines) { lines.resize(lines.size() + nbLines*getNbColumns()*(nbHypothesesMax+1)); } void Config::addComment() { lines.resize(lines.size() + getNbColumns()*(nbHypothesesMax+1)); get(0, getNbLines()-1, 0) = "#"; getLastNotEmptyHyp(0, getNbLines()-1) = "#"; } void Config::resizeLines(unsigned int nbLines) { lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1)); } bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const { return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1; } bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const { return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex); } Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex) { return get(getColIndex(colName), lineIndex, hypothesisIndex); } const Config::String & Config::getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const { return getConst(getColIndex(colName), lineIndex, hypothesisIndex); } Config::String & Config::get(int colIndex, int lineIndex, int hypothesisIndex) { return *getIterator(colIndex, lineIndex, hypothesisIndex); } const Config::String & Config::getConst(int colIndex, int lineIndex, int hypothesisIndex) const { return *getConstIterator(colIndex, lineIndex, hypothesisIndex); } std::size_t Config::getNbLines() const { return lines.size() / getIndexOfCol(getNbColumns()); } void Config::print(FILE * dest) const { std::vector<std::string> currentSequence; std::vector<std::string> currentSequenceComments; auto flushCurrentSequence = [&dest, ¤tSequence, ¤tSequenceComments]() { if (currentSequence.empty() && currentSequenceComments.empty()) return; for (auto & comment : currentSequenceComments) fmt::print(dest, "{}", comment); for (auto & line : currentSequence) fmt::print(dest, "{}", line); fmt::print(dest, "\n"); currentSequence.clear(); currentSequenceComments.clear(); }; for (unsigned int line = 0; line < getNbLines(); line++) { if (isComment(getFirstLineIndex()+line)) { currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0))); continue; } for (unsigned int i = 0; i < getNbColumns()-1; i++) { if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName) { if (i == getNbColumns()-2) currentSequence.back().back() = '\n'; continue; } auto & colContent = getAsFeature(i, getFirstLineIndex()+line); std::string valueToPrint = colContent; try { if (getColName(i) == headColName) if (valueToPrint != "0") valueToPrint = getAsFeature(idColName, std::stoi(valueToPrint)); } catch(std::exception &) {} if (valueToPrint.empty()) valueToPrint = "_"; currentSequence.emplace_back(fmt::format("{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n")); } auto & eosColContent = getAsFeature(EOSColName, getFirstLineIndex()+line); if (eosColContent == EOSSymbol1) flushCurrentSequence(); } flushCurrentSequence(); } void Config::printForDebug(FILE * dest) const { static constexpr int windowSize = 10; static constexpr int lettersWindowSize = 40; static constexpr int maxWordLength = 7; int firstLineToPrint = wordIndex; int lastLineToPrint = wordIndex; while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint-1, 0)) --firstLineToPrint; while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint+1, 0)) ++lastLineToPrint; std::vector<std::vector<std::string>> toPrint; toPrint.emplace_back(); toPrint.back().emplace_back(""); for (unsigned int i = 0; i < getNbColumns(); i++) { if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName) continue; toPrint.back().emplace_back(getColName(i)); } for (int line = firstLineToPrint; line <= lastLineToPrint; line++) { if (isComment(line)) continue; toPrint.emplace_back(); toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : ""); for (unsigned int i = 0; i < getNbColumns(); i++) { if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName) continue; std::string colContent = has(i,line,0) ? getAsFeature(i, line).get() : "?"; std::string toPrintCol = colContent; try { if (getColName(i) == headColName && toPrintCol != "_" && !toPrintCol.empty()) if (toPrintCol != "0" && toPrintCol != "?") toPrintCol = has(0,std::stoi(toPrintCol),0) ? getAsFeature(idColName, std::stoi(toPrintCol)).get() : "?"; } catch(std::exception & e) {util::myThrow(fmt::format("toPrintCol='{}' {}", toPrintCol, e.what()));} toPrint.back().emplace_back(util::shrink(toPrintCol, maxWordLength)); } } std::vector<std::size_t> colLength(toPrint[0].size(), 0); for (auto & line : toPrint) for (unsigned int col = 0; col < line.size()-1; col++) colLength[col] = std::max((int)colLength[col], util::printedLength(line[col])); int lengthSum = 2*getNbColumns(); for (auto & val : colLength) lengthSum += val; std::string longLine = fmt::format("{:-<{}}", "", lengthSum); std::string historyStr = ""; for (auto & h : history) { historyStr += h; historyStr += ","; } if (!historyStr.empty()) historyStr.pop_back(); std::string stackStr = ""; for (auto & s : stack) { if (hasColIndex(idColName)) { if (has(idColName, s, 0)) stackStr += getAsFeature(idColName, s); else stackStr += "?"; } else stackStr += std::to_string(s); stackStr += ","; } if (!stackStr.empty()) stackStr.pop_back(); fmt::print(dest, "{}\n", longLine); for (std::size_t index = characterIndex; index < util::getSize(rawInput) and index - characterIndex < lettersWindowSize; index++) fmt::print(dest, "{}", getLetter(index)); if (!util::isEmpty(rawInput)) fmt::print(dest, "\n{}\n", longLine); fmt::print(dest, "State={}\nwordIndex={} characterIndex={}\nhistory=({})\nstack=({})\n", state, wordIndex, characterIndex, historyStr, stackStr); fmt::print(dest, "{}\n", longLine); for (unsigned int line = 0; line < toPrint.size(); line++) { if (line == 1) fmt::print(dest, "{}\n", longLine); for (unsigned int col = 0; col < toPrint[line].size()-1; col++) fmt::print(dest, "{}{:>{}}{}", toPrint[line][col], "", colLength[col]-util::printedLength(toPrint[line][col]), col == toPrint[line].size()-2 ? "\n" : " "); if (toPrint[line].back() == EOSSymbol1) fmt::print(dest, "\n"); } fmt::print(dest, "{}\n", longLine); } Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex) { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but last line = {}", lineIndex, getNbLines()+getFirstLineIndex()-1)); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex]; } Config::String & Config::getLastNotEmptyHyp(int colIndex, int lineIndex) { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+1]; } Config::String & Config::getFirstEmpty(int colIndex, int lineIndex) { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = 1; i < nbHypothesesMax; ++i) if (util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+nbHypothesesMax]; } Config::String & Config::getFirstEmpty(const std::string & colName, int lineIndex) { return getFirstEmpty(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyConst(int colIndex, int lineIndex) const { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex]; } const Config::String & Config::getLastNotEmptyHypConst(int colIndex, int lineIndex) const { if (!has(colIndex, lineIndex, 0)) util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines())); int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; return lines[baseIndex+1]; } const Config::String & Config::getAsFeature(int colIndex, int lineIndex) const { if (isPredicted(getColName(colIndex))) return getLastNotEmptyHypConst(colIndex, lineIndex); return getLastNotEmptyConst(colIndex, lineIndex); } const Config::String & Config::getAsFeature(const std::string & colName, int lineIndex) const { return getAsFeature(getColIndex(colName), lineIndex); } Config::String & Config::getLastNotEmpty(const std::string & colName, int lineIndex) { return getLastNotEmpty(getColIndex(colName), lineIndex); } Config::String & Config::getLastNotEmptyHyp(const std::string & colName, int lineIndex) { return getLastNotEmptyHyp(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyConst(const std::string & colName, int lineIndex) const { return getLastNotEmptyConst(getColIndex(colName), lineIndex); } const Config::String & Config::getLastNotEmptyHypConst(const std::string & colName, int lineIndex) const { return getLastNotEmptyHypConst(getColIndex(colName), lineIndex); } Config::ValueIterator Config::getIterator(int colIndex, int lineIndex, int hypothesisIndex) { return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex; } Config::ConstValueIterator Config::getConstIterator(int colIndex, int lineIndex, int hypothesisIndex) const { return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex; } void Config::addToHistory(const std::string & transition) { history.push_back(String(transition)); } void Config::addToStack(std::size_t index) { stack.push_back(index); } void Config::popStack() { lastPoppedStack = getStack(0); stack.pop_back(); } void Config::swapStack(int relIndex1, int relIndex2) { int tmp = getStack(relIndex1); getStackRef(relIndex1) = getStack(relIndex2); getStackRef(relIndex2) = tmp; } bool Config::hasCharacter(int letterIndex) const { return letterIndex >= 0 and letterIndex < (int)util::getSize(rawInput); } util::utf8char Config::getLetter(int letterIndex) const { return rawInput[letterIndex]; } bool Config::isComment(std::size_t lineIndex) const { auto iter = getConstIterator(0, lineIndex, 0); return !iter->get().empty() and iter->get()[0] == '#'; } bool Config::isCommentPredicted(std::size_t lineIndex) const { auto & col0Pred = getAsFeature(0, lineIndex); auto & col0Gold = getConst(0, lineIndex, 0); return (!util::isEmpty(col0Pred) and col0Pred.get()[0] == '#') or (!util::isEmpty(col0Gold) and col0Gold.get()[0] == '#'); } bool Config::isMultiword(std::size_t lineIndex) const { return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('-') != std::string::npos; } bool Config::isMultiwordPredicted(std::size_t lineIndex) const { return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('-') != std::string::npos; } int Config::getMultiwordSize(std::size_t lineIndex) const { auto splited = util::split(getConst(idColName, lineIndex, 0).get(), '-'); return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0])); } int Config::getMultiwordSizePredicted(std::size_t lineIndex) const { auto splited = util::split(getAsFeature(idColName, lineIndex).get(), '-'); return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0])); } bool Config::isEmptyNode(std::size_t lineIndex) const { return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('.') != std::string::npos; } bool Config::isEmptyNodePredicted(std::size_t lineIndex) const { return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('.') != std::string::npos; } bool Config::isToken(std::size_t lineIndex) const { return !isComment(lineIndex) && !isMultiword(lineIndex) && !isEmptyNode(lineIndex); } bool Config::isTokenPredicted(std::size_t lineIndex) const { return !isCommentPredicted(lineIndex) && !isMultiwordPredicted(lineIndex) && !isEmptyNodePredicted(lineIndex); } bool Config::moveWordIndex(int relativeMovement) { int nbMovements = 0; int oldVal = wordIndex; while (nbMovements != relativeMovement) { do { relativeMovement > 0 ? wordIndex++ : wordIndex--; if (!has(0,wordIndex,0)) { wordIndex = oldVal; return false; } } while (isComment(wordIndex)); nbMovements += relativeMovement > 0 ? 1 : -1; } return true; } void Config::moveWordIndexRelaxed(int relativeMovement) { int nbMovements = 0; int increment = relativeMovement > 0 ? 1 : -1; while (nbMovements != relativeMovement) { do { if (!has(0,wordIndex+increment,0)) break; wordIndex += increment; } while (isComment(wordIndex)); nbMovements += relativeMovement > 0 ? 1 : -1; } if (!isComment(wordIndex)) return; moveWordIndex(-increment); } bool Config::canMoveWordIndex(int relativeMovement) const { int nbMovements = 0; int oldVal = wordIndex; while (nbMovements != relativeMovement) { do { relativeMovement > 0 ? oldVal++ : oldVal--; if (!has(0,oldVal,0)) return false; } while (isComment(oldVal)); nbMovements += relativeMovement > 0 ? 1 : -1; } return true; } bool Config::moveCharacterIndex(int relativeMovement) { int oldVal = characterIndex; characterIndex = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput))); return (int)characterIndex == oldVal + relativeMovement; } bool Config::canMoveCharacterIndex(int relativeMovement) const { int target = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput))); return target == (int)characterIndex + relativeMovement; } bool Config::rawInputOnlySeparatorsLeft() const { for (unsigned int i = characterIndex; i < util::getSize(rawInput); i++) if (!util::isSeparator(rawInput[i])) return false; return true; } std::size_t Config::getWordIndex() const { return wordIndex; } std::size_t Config::getCharacterIndex() const { return characterIndex; } const Config::String & Config::getHistory(int relativeIndex) const { return history[history.size()-1-relativeIndex]; } std::size_t Config::getStack(int relativeIndex) const { if (relativeIndex == -1) return getLastPoppedStack(); return stack[stack.size()-1-relativeIndex]; } std::size_t & Config::getStackRef(int relativeIndex) { return stack[stack.size()-1-relativeIndex]; } bool Config::hasHistory(int relativeIndex) const { return relativeIndex >= 0 && relativeIndex < (int)history.size(); } bool Config::hasStack(int relativeIndex) const { if (relativeIndex == -1) return has(0,getLastPoppedStack(),0); return relativeIndex >= 0 && relativeIndex < (int)stack.size(); } Config::String Config::getState() const { return state; } void Config::setState(const std::string state) { this->state = state; } bool Config::stateIsDone() const { if (!util::isEmpty(rawInput)) return rawInputOnlySeparatorsLeft() and !has(0, wordIndex+1, 0) and !hasStack(0); return !has(0, wordIndex+1, 0) and !hasStack(0); } void Config::addPredicted(const std::set<std::string> & predicted) { for (auto & col : predicted) { if (!hasColIndex(col)) util::myThrow(fmt::format("unknown column '{}'", col)); this->predicted.insert(col); } for (auto & col : extraColumns) if (col != EOSColName) this->predicted.insert(col); } bool Config::isPredicted(const std::string & colName) const { return predicted.count(colName); } int Config::getLastPoppedStack() const { return lastPoppedStack; } int Config::getCurrentWordId() const { return currentWordId; } void Config::setCurrentWordId(int currentWordId) { this->currentWordId = currentWordId; } void Config::addMissingColumns() { int firstIndex = 0; for (unsigned int index = 0; index < getNbLines(); index++) { if (!isTokenPredicted(index)) continue; if (util::isEmpty(getAsFeature(idColName, index))) { int last = 0; if (index > 0 and isTokenPredicted(index-1)) last = std::stoi(getAsFeature(idColName, index-1)); getLastNotEmptyHyp(idColName, index) = std::to_string(last+1); } int curId = std::stoi(getAsFeature(idColName, index)); if (curId == 1) firstIndex = index; if (hasColIndex(headColName)) if (util::isEmpty(getAsFeature(headColName, index))) getLastNotEmptyHyp(headColName, index) = (curId == 1) ? "0" : std::to_string(firstIndex); } } long Config::getRelativeWordIndex(int relativeIndex) const { if (relativeIndex < 0) { for (int index = getWordIndex()-1, counter = 0; has(0,index,0); --index) if (!isCommentPredicted(index)) { --counter; if (counter == relativeIndex) return index; } } else { for (int index = getWordIndex(), counter = 0; has(0,index,0); ++index) if (!isCommentPredicted(index)) { if (counter == relativeIndex) return index; ++counter; } } return -1; } long Config::getRelativeWordIndex(Object object, int relativeIndex) const { if (object == Object::Buffer) return getRelativeWordIndex(relativeIndex); return getStack(relativeIndex); } bool Config::hasRelativeWordIndex(Object object, int relativeIndex) const { if (object == Object::Buffer) return has(0,getRelativeWordIndex(relativeIndex),0); return hasStack(relativeIndex); } void Config::setAppliableSplitTransitions(const std::vector<Transition *> & appliableSplitTransitions) { this->appliableSplitTransitions = appliableSplitTransitions; } void Config::setAppliableTransitions(const std::vector<int> & appliableTransitions) { this->appliableTransitions = appliableTransitions; } const std::vector<Transition *> & Config::getAppliableSplitTransitions() const { return appliableSplitTransitions; } const std::vector<int> & Config::getAppliableTransitions() const { return appliableTransitions; } Config::Object Config::str2object(const std::string & s) { if (s == "b") return Object::Buffer; if (s == "s") return Object::Stack; util::myThrow(fmt::format("Invalid object '{}'", s)); return Object::Buffer; } bool Config::isExtraColumn(const std::string & colName) const { for (auto & extraCol : extraColumns) if (extraCol == colName) return true; return false; } int Config::getLastAttached() const { return lastAttached; } void Config::setLastAttached(int lastAttached) { this->lastAttached = lastAttached; } std::size_t Config::getStackSize() const { return stack.size(); } void Config::setStrategy(const std::vector<std::string> & strategyDefinition) { strategy.reset(new Strategy(strategyDefinition)); } Strategy & Config::getStrategy() { if (strategy.get() == nullptr) util::myThrow("strategy was not set"); return *strategy.get(); }