#include "Action.hpp" #include "Transition.hpp" Action::Action(Action::Type type, std::function<void(Config & config, Action & action)> apply, std::function<void(Config & config, Action & action)> undo, std::function<bool(const Config & config, const Action & action)> appliable) { this->type = type; this->apply = apply; this->undo = undo; this->appliable = appliable; } Action Action::addLinesIfNeeded(int nbLines) { auto apply = [nbLines](Config & config, Action &) { while (!config.has(0, config.getWordIndex()+nbLines, 0)) config.addLines(1); }; auto undo = [](Config &, Action &) { }; auto appliable = [](const Config &, const Action &) { return true; }; return {Type::AddLines, apply, undo, appliable}; } Action Action::moveWordIndex(int movement) { auto apply = [movement](Config & config, Action &) { config.moveWordIndex(movement); }; auto undo = [movement](Config & config, Action &) { config.moveWordIndex(-movement); }; auto appliable = [movement](const Config & config, const Action &) { return config.canMoveWordIndex(movement); }; return {Type::MoveWord, apply, undo, appliable}; } Action Action::setMultiwordIds(int multiwordSize) { auto apply = [multiwordSize](Config & config, Action & a) { addHypothesisRelative(Config::idColName, Config::Object::Buffer, 0, fmt::format("{}-{}", config.getCurrentWordId()+1, config.getCurrentWordId()+multiwordSize)).apply(config, a); for (int i = 0; i < multiwordSize; i++) { addHypothesisRelative(Config::idColName, Config::Object::Buffer, i+1, fmt::format("{}", config.getCurrentWordId()+1+i)).apply(config, a); addHypothesisRelative(Config::isMultiColName, Config::Object::Buffer, i+1, Config::EOSSymbol1).apply(config, a); } }; auto undo = [multiwordSize](Config & config, Action &) { config.getLastNotEmpty(Config::idColName, config.getWordIndex()) = ""; for (int i = 0; i < multiwordSize; i++) config.getLastNotEmpty(Config::idColName, config.getWordIndex()+1+i) = ""; }; auto appliable = [](const Config &, const Action &) { return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::consumeCharacterIndex(const util::utf8string & consumed) { auto apply = [consumed](Config & config, Action &) { config.moveCharacterIndex(consumed.size()); }; auto undo = [consumed](Config & config, Action &) { config.moveCharacterIndex(-consumed.size()); }; auto appliable = [consumed](const Config & config, const Action &) { if (!config.canMoveCharacterIndex(consumed.size())) return false; for (unsigned int i = 0; i < consumed.size(); i++) { if (!config.hasCharacter(config.getCharacterIndex()+i)) return false; const util::utf8char & letter = config.getLetter(config.getCharacterIndex()+i); const util::utf8char & consumedLetter = consumed[i]; if (util::lower(letter) != util::lower(consumedLetter)) return false; } return true; }; return {Type::MoveWord, apply, undo, appliable}; } Action Action::moveCharacterIndex(int movement) { auto apply = [movement](Config & config, Action &) { config.moveCharacterIndex(movement); }; auto undo = [movement](Config & config, Action &) { config.moveCharacterIndex(-movement); }; auto appliable = [movement](const Config & config, const Action &) { return config.canMoveCharacterIndex(movement); }; return {Type::MoveChar, apply, undo, appliable}; } Action Action::addHypothesis(const std::string & colName, std::size_t lineIndex, const std::string & hypothesis) { auto apply = [colName, lineIndex, hypothesis](Config & config, Action &) { config.getFirstEmpty(colName, lineIndex) = hypothesis; }; auto undo = [colName, lineIndex](Config & config, Action &) { config.getLastNotEmpty(colName, lineIndex) = ""; }; auto appliable = [](const Config &, const Action &) { return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::addToHypothesis(const std::string & colName, std::size_t lineIndex, const std::string & addition) { auto apply = [colName, lineIndex, addition](Config & config, Action &) { auto currentElems = util::split(std::string(config.getLastNotEmptyHypConst(colName, lineIndex)), '|'); currentElems.emplace_back(addition); std::sort(currentElems.begin(), currentElems.end()); config.getLastNotEmptyHyp(colName, lineIndex) = util::join("|", currentElems); }; auto undo = [colName, lineIndex, addition](Config & config, Action &) { auto curElems = util::split(std::string(config.getLastNotEmptyHypConst(colName, lineIndex)), '|'); std::vector<std::string> newElems; for (auto & elem : curElems) if (elem != addition) newElems.emplace_back(elem); config.getLastNotEmptyHyp(colName, lineIndex) = util::join("|", newElems); }; auto appliable = [colName, lineIndex, addition](const Config & config, const Action &) { if (!config.has(colName, lineIndex, 0)) return false; auto & current = config.getLastNotEmptyHypConst(colName, lineIndex); auto splited = util::split(std::string(current), '|'); for (auto & part : splited) if (part == addition) return false; return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::sumToHypothesis(const std::string & colName, std::size_t lineIndex, float addition, bool mean) { auto apply = [colName, lineIndex, addition, mean](Config & config, Action &) { std::string totalStr = std::string(config.getLastNotEmptyHypConst(colName, lineIndex)); if (totalStr.empty() || totalStr == "_") totalStr = fmt::format("{}={}|{}", std::string(config.getState()), 0.0, 0); auto byStates = util::split(totalStr, ','); int index = -1; for (unsigned int i = 0; i < byStates.size(); i++) { auto state = util::split(byStates[i], '=')[0]; if (state == config.getState()) { index = i; break; } } if (index == -1) { byStates.emplace_back(fmt::format("{}={}|{}", std::string(config.getState()), 0.0, 0)); index = byStates.size()-1; } auto splited = util::split(util::split(byStates[index], '=')[1], '|'); float curVal = 0.0; int curNb = 0; if (splited.size() == 2) { curVal = std::stof(splited[0]); curNb = std::stoi(splited[1]); } curNb += 1; if (mean) { // Knuth’s algorithm for online mean float delta = addition - curVal; curVal += delta / curNb; } else { curVal += addition; } byStates[index] = fmt::format("{}={}|{}", std::string(config.getState()), curVal, curNb); config.getLastNotEmptyHyp(colName, lineIndex) = util::join(",", byStates); }; auto undo = [colName, lineIndex, addition, mean](Config & config, Action &) { std::string totalStr = std::string(config.getLastNotEmptyHypConst(colName, lineIndex)); if (totalStr.empty() || totalStr == "_") totalStr = fmt::format("{}={}|{}", std::string(config.getState()), 0.0, 0); auto byStates = util::split(totalStr, ','); int index = -1; for (unsigned int i = 0; i < byStates.size(); i++) { auto state = util::split(byStates[i], '=')[0]; if (state == config.getState()) { index = i; break; } } if (index == -1) { byStates.emplace_back(fmt::format("{}={}|{}", std::string(config.getState()), 0.0, 0)); index = byStates.size()-1; } auto splited = util::split(util::split(byStates[index], '=')[1], '|'); float curVal = 0.0; int curNb = 0; if (splited.size() == 2) { curVal = std::stof(splited[0]); curNb = std::stoi(splited[1]); } curNb -= 1; // Knuth’s algorithm for online mean if (mean) curVal = (curNb*curVal - addition) / (curNb - 1); else curVal -= addition; byStates[index] = fmt::format("{}={}|{}", std::string(config.getState()), curVal, curNb); config.getLastNotEmptyHyp(colName, lineIndex) = util::join(",", byStates); }; auto appliable = [colName, lineIndex, addition](const Config & config, const Action &) { return config.has(colName, lineIndex, 0); }; return {Type::Write, apply, undo, appliable}; } Action Action::addToHypothesisRelative(const std::string & colName, Config::Object object, int relativeIndex, const std::string & addition) { auto apply = [colName, object, relativeIndex, addition](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addToHypothesis(colName, lineIndex, addition).apply(config, a); }; auto undo = [colName, object, relativeIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addToHypothesis(colName, lineIndex, "").undo(config, a); }; auto appliable = [colName, object, relativeIndex, addition](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(object, relativeIndex)) return false; int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addToHypothesis(colName, lineIndex, addition).appliable(config, a); }; return {Type::Write, apply, undo, appliable}; } Action Action::addHypothesisRelative(const std::string & colName, Config::Object object, int relativeIndex, const std::string & hypothesis) { auto apply = [colName, object, relativeIndex, hypothesis](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addHypothesis(colName, lineIndex, hypothesis).apply(config, a); }; auto undo = [colName, object, relativeIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addHypothesis(colName, lineIndex, "").undo(config, a); }; auto appliable = [colName, object, relativeIndex](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(object, relativeIndex)) return false; int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addHypothesis(colName, lineIndex, "").appliable(config, a); }; return {Type::Write, apply, undo, appliable}; } Action Action::addHypothesisRelativeRelaxed(const std::string & colName, Config::Object object, int relativeIndex, const std::string & hypothesis) { auto apply = [colName, object, relativeIndex, hypothesis](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addHypothesis(colName, lineIndex, hypothesis).apply(config, a); }; auto undo = [colName, object, relativeIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addHypothesis(colName, lineIndex, "").undo(config, a); }; auto appliable = [](const Config &, const Action &) { return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::pushWordIndexOnStack() { auto apply = [](Config & config, Action & a) { if (config.isTokenPredicted(config.getWordIndex())) { a.data.emplace_back(); config.addToStack(config.getWordIndex()); } }; auto undo = [](Config & config, Action & a) { if (!a.data.empty()) { config.popStack(); a.data.pop_back(); } }; auto appliable = [](const Config & config, const Action &) { if (config.hasStack(0) and config.getStack(0) == config.getWordIndex()) return false; if (config.hasStack(0) and !config.isTokenPredicted(config.getStack(0))) return false; return (int)config.getWordIndex() != config.getLastPoppedStack(); }; return {Type::Push, apply, undo, appliable}; } Action Action::popStack(int relIndex) { auto apply = [relIndex](Config & config, Action & a) { auto toSave = config.getStack(relIndex); a.data.push_back(std::to_string(toSave)); for (int i = 0; relIndex-1-i >= 0; i++) config.swapStack(relIndex-i, relIndex-1-i); config.popStack(); }; auto undo = [relIndex](Config & config, Action & a) { config.addToStack(std::stoi(a.data.back())); for (int i = 0; i+1 <= relIndex; i++) config.swapStack(i, i+1); }; auto appliable = [relIndex](const Config & config, const Action &) { return config.hasStack(relIndex) and config.getStack(relIndex) != config.getWordIndex(); }; return {Type::Pop, apply, undo, appliable}; } Action Action::endWord() { auto apply = [](Config & config, Action & a) { config.setCurrentWordId(config.getCurrentWordId()+1); addHypothesisRelative(Config::idColName, Config::Object::Buffer, 0, std::to_string(config.getCurrentWordId())).apply(config, a); if (!config.rawInputOnlySeparatorsLeft() and !config.has(0,config.getWordIndex()+1,0)) config.addLines(1); }; auto undo = [](Config & config, Action &) { config.setCurrentWordId(config.getCurrentWordId()-1); config.getLastNotEmpty(Config::idColName, config.getWordIndex()) = ""; }; auto appliable = [](const Config & config, const Action &) { if (std::string(config.getAsFeature("FORM", config.getWordIndex())).empty()) return false; if (!std::string(config.getAsFeature(Config::idColName, config.getWordIndex())).empty() and config.getAsFeature(Config::isMultiColName, config.getWordIndex()) != Config::EOSSymbol1) return false; return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::emptyStack() { auto apply = [](Config & config, Action & a) { while (config.hasStack(0)) { a.data.push_back(std::to_string(config.getStack(0))); config.popStack(); } }; auto undo = [](Config & config, Action & a) { while (a.data.size()) { config.addToStack(std::stoi(a.data.back())); a.data.pop_back(); } }; auto appliable = [](const Config &, const Action &) { return true; }; return {Type::Pop, apply, undo, appliable}; } Action Action::ignoreCurrentCharacter() { auto apply = [](Config & config, Action &) { config.moveCharacterIndex(1); }; auto undo = [](Config & config, Action &) { config.moveCharacterIndex(-1); }; auto appliable = [](const Config & config, const Action &) { return config.hasCharacter(config.getCharacterIndex()) and util::isSeparator(config.getLetter(config.getCharacterIndex())) and config.canMoveCharacterIndex(1); }; return {Type::MoveChar, apply, undo, appliable}; } Action Action::assertIsEmpty(const std::string & colName, Config::Object object, int relativeIndex) { auto apply = [](Config &, Action &) { }; auto undo = [](Config &, Action &) { }; auto appliable = [colName, object, relativeIndex](const Config & config, const Action &) { try { if (!config.hasRelativeWordIndex(object, relativeIndex)) return false; auto lineIndex = config.getRelativeWordIndex(object, relativeIndex); return std::string(config.getAsFeature(colName, lineIndex)).empty(); } catch (std::exception & e) { util::myThrow(fmt::format("colName='{}' object='{}' relativeIndex='{}' {}", colName, object == Config::Object::Stack ? "Stack" : "Buffer", relativeIndex, e.what())); } return false; }; return {Type::Check, apply, undo, appliable}; } Action Action::assertIsNotEmpty(const std::string & colName, Config::Object object, int relativeIndex) { auto apply = [](Config &, Action &) { }; auto undo = [](Config &, Action &) { }; auto appliable = [colName, object, relativeIndex](const Config & config, const Action &) { try { if (!config.hasRelativeWordIndex(object, relativeIndex)) return false; auto lineIndex = config.getRelativeWordIndex(object, relativeIndex); return !std::string(config.getAsFeature(colName, lineIndex)).empty(); } catch (std::exception & e) { util::myThrow(fmt::format("colName='{}' object='{}' relativeIndex='{}' {}", colName, object == Config::Object::Stack ? "Stack" : "Buffer", relativeIndex, e.what())); } return false; }; return {Type::Check, apply, undo, appliable}; } Action Action::addCharsToCol(const std::string & col, int n, Config::Object object, int relativeIndex) { auto apply = [col, n, object, relativeIndex](Config & config, Action &) { auto index = config.getRelativeWordIndex(object, relativeIndex); auto & curWord = config.getLastNotEmptyHyp(col, index); if (col == "FORM") { if (std::string(config.getAsFeature(Config::rawRangeStartColName, index)).empty()) config.getLastNotEmptyHyp(Config::rawRangeStartColName, index) = fmt::format("{}", config.getCharacterIndex()); config.getLastNotEmptyHyp(Config::rawRangeEndColName, index) = fmt::format("{}", config.getCharacterIndex()); int curEndValue = std::stoi(config.getAsFeature(Config::rawRangeEndColName, index)); config.getLastNotEmptyHyp(Config::rawRangeEndColName, index) = fmt::format("{}", curEndValue+n); } for (int i = 0; i < n; i++) curWord = fmt::format("{}{}", std::string(curWord), config.getLetter(config.getCharacterIndex()+i)); }; auto undo = [col, n, object, relativeIndex](Config & config, Action &) { auto index = config.getRelativeWordIndex(object, relativeIndex); auto & curWord = config.getLastNotEmptyHyp(col, index); auto newWord = util::splitAsUtf8(std::string(curWord)); for (int i = 0; i < n; i++) newWord.pop_back(); curWord = fmt::format("{}", newWord); if (newWord.size() == 0) config.getLastNotEmptyHyp(Config::rawRangeStartColName, index) = "0"; int curEndValue = std::stoi(config.getAsFeature(Config::rawRangeEndColName, index)); config.getLastNotEmptyHyp(Config::rawRangeEndColName, index) = fmt::format("{}", curEndValue-n); }; auto appliable = [col, n, object, relativeIndex](const Config & config, const Action &) { if (!config.hasCharacter(config.getCharacterIndex()+n-1)) return false; auto firstLetter = config.getLetter(config.getCharacterIndex()); if (firstLetter == ' ' and std::string(config.getAsFeature(col, config.getRelativeWordIndex(object, relativeIndex))).empty()) return false; for (int i = 0; i < n; i++) if (util::isIllegal(config.getLetter(config.getCharacterIndex()+i))) return false; return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::setRoot(int bufferIndex) { auto apply = [bufferIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(Config::Object::Buffer, bufferIndex); int rootIndex = -1; int searchStartIndex = lineIndex; if (searchStartIndex > 0 and config.getAsFeature(Config::idColName, lineIndex) != "1") searchStartIndex--; int firstSentIndex = lineIndex; for (int i = searchStartIndex; true; --i) { if (!config.has(0, i, 0)) { if (i < 0) break; util::myThrow("The current sentence is too long to be completly held by the data strucure. Consider increasing SubConfig::SpanSize"); } if (!config.isTokenPredicted(i)) continue; if (config.getAsFeature(Config::EOSColName, i) == Config::EOSSymbol1) break; firstSentIndex = i; } for (int i = lineIndex; i >= firstSentIndex; --i) { if (!config.isTokenPredicted(i)) continue; if (std::string(config.getAsFeature(Config::headColName, i)).empty()) { rootIndex = i; a.data.push_back(std::to_string(i)); } } for (int i = lineIndex; i >= firstSentIndex; --i) { if (!config.isTokenPredicted(i)) continue; if (std::string(config.getAsFeature(Config::headColName, i)).empty()) { if (i == rootIndex) { config.getFirstEmpty(Config::headColName, i) = "-1"; config.getFirstEmpty(Config::deprelColName, i) = "root"; } else { config.getFirstEmpty(Config::headColName, i) = std::to_string(rootIndex); } } } }; auto undo = [](Config & config, Action & a) { while (a.data.size()) { config.getLastNotEmptyHyp(Config::headColName, std::stoi(a.data.back())) = ""; a.data.pop_back(); } }; auto appliable = [bufferIndex](const Config & config, const Action &) { int lineIndex = config.getRelativeWordIndex(Config::Object::Buffer, bufferIndex); return config.has(0,lineIndex,0) and config.isTokenPredicted(lineIndex) and config.getAsFeature(Config::isMultiColName, lineIndex) != Config::EOSSymbol1; }; return {Type::Write, apply, undo, appliable}; } Action Action::updateIds(int bufferIndex) { auto apply = [bufferIndex](Config & config, Action &) { int lineIndex = config.getRelativeWordIndex(Config::Object::Buffer, bufferIndex); int firstIndexOfSentence = -1; int lastSentId = -1; for (int i = lineIndex; true; --i) { if (!config.has(0, i, 0)) { if (i < 0) break; util::myThrow("The current sentence is too long to be completly held by the data strucure. Consider increasing SubConfig::SpanSize"); } if (config.isEmptyNode(i)) continue; if (config.getLastNotEmptyHypConst(Config::EOSColName, i) == Config::EOSSymbol1) { lastSentId = std::stoi(config.getAsFeature(Config::sentIdColName, i)); break; } firstIndexOfSentence = i; } if (firstIndexOfSentence < 0) util::myThrow("could not find any token in current sentence"); for (int i = firstIndexOfSentence, currentId = 1; i <= lineIndex; ++i) { if (config.isEmptyNode(i)) continue; if (config.isMultiwordPredicted(i)) config.getFirstEmpty(Config::idColName, i) = fmt::format("{}-{}", currentId, currentId+config.getMultiwordSizePredicted(i)); else config.getFirstEmpty(Config::idColName, i) = fmt::format("{}", currentId++); config.getFirstEmpty(Config::sentIdColName, i) = fmt::format("{}", lastSentId+1); } // Update metadata '# text = ...' and '# sent_id = X' before the sentence if (config.hasCharacter(0)) if (config.has(0,firstIndexOfSentence,0)) { std::string textMetadata = "# text = "; int firstIndex = 0; int lastIndex = 0; try {firstIndex = std::stoi(config.getAsFeature(Config::rawRangeStartColName, firstIndexOfSentence));} catch (std::exception & e) {util::myThrow(fmt::format("{} : '{}'", e.what(), std::string(config.getAsFeature(Config::rawRangeStartColName, firstIndexOfSentence))));} try {lastIndex = std::stoi(config.getAsFeature(Config::rawRangeEndColName, lineIndex));} catch (std::exception & e) {util::myThrow(fmt::format("{} : '{}'", e.what(), std::string(config.getAsFeature(Config::rawRangeEndColName, lineIndex))));} for (auto i = firstIndex; i < lastIndex; i++) textMetadata = fmt::format("{}{}", textMetadata, config.getLetter(i)); config.getLastNotEmptyHyp(Config::commentsColName, firstIndexOfSentence) = fmt::format("{}\n# sent_id = {}", textMetadata, std::string(config.getAsFeature(Config::sentIdColName, firstIndexOfSentence))); } }; auto undo = [](Config &, Action &) { // TODO : undo this }; auto appliable = [](const Config &, const Action &) { return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::attach(Config::Object governorObject, int governorIndex, Config::Object dependentObject, int dependentIndex) { auto apply = [governorObject, governorIndex, dependentObject, dependentIndex](Config & config, Action & a) { long govIndex = config.getRelativeWordIndex(governorObject, governorIndex); long depIndex = config.getRelativeWordIndex(dependentObject, dependentIndex); addHypothesisRelative(Config::headColName, dependentObject, dependentIndex, std::to_string(govIndex)).apply(config, a); addToHypothesisRelative(Config::childsColName, governorObject, governorIndex, std::to_string(depIndex)).apply(config, a); a.data.emplace_back(std::to_string(config.getLastAttached())); config.setLastAttached(depIndex); }; auto undo = [governorObject, governorIndex, dependentObject, dependentIndex](Config & config, Action & a) { addHypothesisRelative(Config::headColName, dependentObject, dependentIndex, "").undo(config, a); addToHypothesisRelative(Config::childsColName, governorObject, governorIndex, "").apply(config, a); config.setLastAttached(std::stoi(a.data.back())); a.data.pop_back(); }; auto appliable = [governorObject, governorIndex, dependentObject, dependentIndex](const Config & config, const Action &) { if (!config.hasRelativeWordIndex(governorObject, governorIndex) or !config.hasRelativeWordIndex(dependentObject, dependentIndex)) return false; long govLineIndex = config.getRelativeWordIndex(governorObject, governorIndex); long depLineIndex = config.getRelativeWordIndex(dependentObject, dependentIndex); if (!config.isTokenPredicted(govLineIndex) or !config.isTokenPredicted(depLineIndex)) return false; // Check if dep and head belongs to the same sentence if (config.getAsFeature(Config::sentIdColName, govLineIndex) != config.getAsFeature(Config::sentIdColName, depLineIndex)) return false; // Check if dep is not already attached if (!std::string(config.getAsFeature(Config::headColName, depLineIndex)).empty()) return false; return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::split(int index) { auto apply = [index](Config & config, Action &) { Transition * t = config.getAppliableSplitTransitions()[index]; t->apply(config); }; auto undo = [](Config &, Action &) { //TODO : undo this }; auto appliable = [index](const Config & config, const Action &) { auto & transitions = config.getAppliableSplitTransitions(); if (index < 0 or index >= (int)transitions.size()) return false; Transition * t = transitions[index]; return t->appliable(config); }; return {Type::Write, apply, undo, appliable}; } Action Action::setRootUpdateIdsEmptyStackIfSentChanged() { auto apply = [](Config & config, Action &) { int lineIndex = config.getWordIndex(); int rootIndex = lineIndex; int lastSentId = -1; int firstIndexOfSentence = lineIndex; if (config.getAsFeature(Config::EOSColName, lineIndex) != Config::EOSSymbol1) return; for (int i = lineIndex-1; true; --i) { if (!config.has(0, i, 0)) { if (i < 0) break; util::myThrow("The current sentence is too long to be completly held by the data strucure. Consider increasing SubConfig::SpanSize"); } if (!config.isTokenPredicted(i)) continue; if (config.getAsFeature(Config::EOSColName, i) == Config::EOSSymbol1) { lastSentId = std::stoi(config.getAsFeature(Config::sentIdColName, i)); break; } if (std::string(config.getAsFeature(Config::headColName, i)).empty()) rootIndex = i; firstIndexOfSentence = i; } for (int i = firstIndexOfSentence; i <= lineIndex; ++i) { if (!config.has(0, i, 0)) { if (i < 0) break; util::myThrow("The current sentence is too long to be completly held by the data strucure. Consider increasing SubConfig::SpanSize"); } if (!config.isTokenPredicted(i)) continue; if (std::string(config.getAsFeature(Config::headColName, i)).empty()) { if (i == rootIndex) { config.getFirstEmpty(Config::headColName, i) = "0"; config.getFirstEmpty(Config::deprelColName, i) = "root"; } else { config.getFirstEmpty(Config::headColName, i) = std::to_string(rootIndex); } } } for (int i = firstIndexOfSentence, currentId = 1; i <= lineIndex; ++i) { if (config.isEmptyNode(i)) continue; if (config.isMultiwordPredicted(i)) config.getFirstEmpty(Config::idColName, i) = fmt::format("{}-{}", currentId, currentId+config.getMultiwordSizePredicted(i)); else config.getFirstEmpty(Config::idColName, i) = fmt::format("{}", currentId++); config.getFirstEmpty(Config::sentIdColName, i) = fmt::format("{}", lastSentId+1); } while (config.hasStack(0)) config.popStack(); }; auto undo = [](Config &, Action &) { //TODO undo this }; auto appliable = [](const Config & config, const Action &) { int lineIndex = config.getWordIndex(); return config.has(0,lineIndex,0); }; return {Type::Write, apply, undo, appliable}; } Action Action::deprel(std::string value) { auto apply = [value](Config & config, Action & a) { addHypothesis(Config::deprelColName, config.getLastAttached(), value).apply(config, a); }; auto undo = [](Config & config, Action & a) { addHypothesis(Config::deprelColName, config.getLastAttached(), "").undo(config, a); }; auto appliable = [](const Config & config, const Action &) { return config.has(0,config.getLastAttached(),0); }; return {Type::Write, apply, undo, appliable}; } Action Action::transformSuffix(std::string fromCol, Config::Object fromObj, int fromIndex, std::string toCol, Config::Object toObj, int toIndex, util::utf8string toRemove, util::utf8string toAdd) { auto apply = [fromCol, fromObj, fromIndex, toCol, toObj, toIndex, toRemove, toAdd](Config & config, Action & a) { int fromLineIndex = config.getRelativeWordIndex(fromObj, fromIndex); int toLineIndex = config.getRelativeWordIndex(toObj, toIndex); if (toRemove.empty() and toAdd.empty()) { addHypothesis(toCol, toLineIndex, std::string(config.getAsFeature(fromCol, fromLineIndex))).apply(config, a); return; } util::utf8string res = util::splitAsUtf8(util::lower(std::string(config.getAsFeature(fromCol, fromLineIndex)))); for (unsigned int i = 0; i < toRemove.size(); i++) res.pop_back(); for (auto & letter : toAdd) res.push_back(letter); addHypothesis(toCol, toLineIndex, fmt::format("{}", res)).apply(config, a); }; auto undo = [toCol, toObj, toIndex](Config & config, Action & a) { int toLineIndex = config.getRelativeWordIndex(toObj, toIndex); addHypothesis(toCol, toLineIndex, "").undo(config, a); }; auto appliable = [fromCol, fromObj, fromIndex, toCol, toObj, toIndex, toRemove, toAdd](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(fromObj, fromIndex) or !config.hasRelativeWordIndex(toObj, toIndex)) return false; int fromLineIndex = config.getRelativeWordIndex(fromObj, fromIndex); int toLineIndex = config.getRelativeWordIndex(toObj, toIndex); util::utf8string res = util::splitAsUtf8(util::lower(std::string(config.getAsFeature(fromCol, fromLineIndex)))); if (res.size() < toRemove.size()) return false; for (unsigned int i = 0; i < toRemove.size(); i++) { if (res.back() != toRemove[toRemove.size()-1-i]) return false; res.pop_back(); } for (auto & letter : toAdd) res.push_back(letter); return addHypothesis(toCol, toLineIndex, fmt::format("{}", res)).appliable(config, a); }; return {Type::Write, apply, undo, appliable}; } Action Action::copyContent(std::string fromCol, Config::Object fromObj, int fromIndex, std::string toCol, Config::Object toObj, int toIndex) { auto apply = [fromCol, fromObj, fromIndex, toCol, toObj, toIndex](Config & config, Action & a) { auto empty = util::utf8string(); transformSuffix(fromCol, fromObj, fromIndex, toCol, toObj, toIndex, empty, empty).apply(config, a); }; auto undo = [toCol, toObj, toIndex, fromCol, fromObj, fromIndex](Config & config, Action & a) { auto empty = util::utf8string(); transformSuffix(fromCol, fromObj, fromIndex, toCol, toObj, toIndex, empty, empty).undo(config, a); }; auto appliable = [](const Config &, const Action &) { return true; }; return {Type::Write, apply, undo, appliable}; } Action Action::uppercase(std::string col, Config::Object obj, int index) { auto apply = [col, obj, index](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(obj, index); auto res = util::upper(config.getAsFeature(col, lineIndex)); addHypothesis(col, lineIndex, res).apply(config, a); }; auto undo = [col, obj, index](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(obj, index); addHypothesis(col, lineIndex, "").undo(config, a); }; auto appliable = [col, obj, index](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(obj, index)) return false; int lineIndex = config.getRelativeWordIndex(obj, index); return addHypothesis(col, lineIndex, "").appliable(config, a); }; return {Type::Write, apply, undo, appliable}; } Action Action::uppercaseIndex(std::string col, Config::Object obj, int index, int inIndex) { auto apply = [col, obj, index, inIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(obj, index); auto res = util::splitAsUtf8(std::string(config.getAsFeature(col, lineIndex))); util::upper(res[inIndex]); addHypothesis(col, lineIndex, fmt::format("{}", res)).apply(config, a); }; auto undo = [col, obj, index](Config & config, Action &) { int lineIndex = config.getRelativeWordIndex(obj, index); auto & value = config.getLastNotEmptyHyp(col, lineIndex); auto res = util::splitAsUtf8(std::string(value)); value = fmt::format("{}", res); }; auto appliable = [col, obj, index, inIndex](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(obj, index)) return false; int lineIndex = config.getRelativeWordIndex(obj, index); if ((int)util::splitAsUtf8(std::string(config.getAsFeature(col, lineIndex))).size() <= inIndex) return false; return addHypothesis(col, lineIndex, "").appliable(config, a); }; return {Type::Write, apply, undo, appliable}; } Action Action::lowercase(std::string col, Config::Object obj, int index) { auto apply = [col, obj, index](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(obj, index); auto res = util::lower(config.getAsFeature(col, lineIndex)); addHypothesis(col, lineIndex, res).apply(config, a); }; auto undo = [col, obj, index](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(obj, index); addHypothesis(col, lineIndex, "").undo(config, a); }; auto appliable = [col, obj, index](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(obj, index)) return false; int lineIndex = config.getRelativeWordIndex(obj, index); return addHypothesis(col, lineIndex, "").appliable(config, a); }; return {Type::Write, apply, undo, appliable}; } Action Action::lowercaseIndex(std::string col, Config::Object obj, int index, int inIndex) { auto apply = [col, obj, index, inIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(obj, index); auto res = util::splitAsUtf8(std::string(config.getAsFeature(col, lineIndex))); util::lower(res[inIndex]); addHypothesis(col, lineIndex, fmt::format("{}", res)).apply(config, a); }; auto undo = [col, obj, index](Config & config, Action &) { int lineIndex = config.getRelativeWordIndex(obj, index); auto & value = config.getLastNotEmptyHyp(col, lineIndex); auto res = util::splitAsUtf8(std::string(value)); value = fmt::format("{}", res); }; auto appliable = [col, obj, index, inIndex](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(obj, index)) return false; int lineIndex = config.getRelativeWordIndex(obj, index); if ((int)util::splitAsUtf8(std::string(config.getAsFeature(col, lineIndex))).size() <= inIndex) return false; return addHypothesis(col, lineIndex, "").appliable(config, a); }; return {Type::Write, apply, undo, appliable}; } Action Action::writeScore(const std::string & colName, Config::Object object, int relativeIndex) { auto apply = [colName, object, relativeIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); float score = config.getChosenActionScore(); if (-score != std::numeric_limits<float>::max()) return addHypothesis(colName, lineIndex, fmt::format("{}", score)).apply(config, a); else return addHypothesis(colName, lineIndex, config.getConst(colName, lineIndex, 0)).apply(config, a); }; auto undo = [colName, object, relativeIndex](Config & config, Action & a) { int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addHypothesis(colName, lineIndex, "").undo(config, a); }; auto appliable = [colName, object, relativeIndex](const Config & config, const Action & a) { if (!config.hasRelativeWordIndex(object, relativeIndex)) return false; int lineIndex = config.getRelativeWordIndex(object, relativeIndex); return addHypothesis(colName, lineIndex, "").appliable(config, a); }; return {Type::Write, apply, undo, appliable}; }