diff --git a/reading_machine/src/Action.cpp b/reading_machine/src/Action.cpp index c15880859ffa1e7f662a6bedd6ec2a468e246aab..75ca4fa8b3bdb56043ac10e78e89ae7e9002c2d7 100644 --- a/reading_machine/src/Action.cpp +++ b/reading_machine/src/Action.cpp @@ -80,9 +80,6 @@ Action Action::consumeCharacterIndex(const util::utf8string & consumed) { auto apply = [consumed](Config & config, Action &) { - config.getLastNotEmptyHyp(Config::rawRangeStartColName, config.getWordIndex()) = fmt::format("{}", config.getCharacterIndex()); - config.getLastNotEmptyHyp(Config::rawRangeEndColName, config.getWordIndex()) = fmt::format("{}", config.getCharacterIndex() + consumed.size()); - config.moveCharacterIndex(consumed.size()); }; @@ -586,8 +583,7 @@ Action Action::addCharsToCol(const std::string & col, int n, Config::Object obje { if (util::isEmpty(config.getAsFeature(Config::rawRangeStartColName, index))) config.getLastNotEmptyHyp(Config::rawRangeStartColName, index) = fmt::format("{}", config.getCharacterIndex()); - if (util::isEmpty(config.getAsFeature(Config::rawRangeEndColName, index))) - config.getLastNotEmptyHyp(Config::rawRangeEndColName, index) = fmt::format("{}", config.getCharacterIndex()); + config.getLastNotEmptyHyp(Config::rawRangeEndColName, index) = fmt::format("{}", config.getCharacterIndex()); int curEndValue = std::stoi(config.getAsFeature(Config::rawRangeEndColName, index)); config.getLastNotEmptyHyp(Config::rawRangeEndColName, index) = fmt::format("{}", curEndValue+n); } @@ -753,8 +749,15 @@ Action Action::updateIds(int bufferIndex) if (config.has(0,firstIndexOfSentence,0)) { std::string textMetadata = "# text = "; - for (auto i = std::stoi(config.getAsFeature(Config::rawRangeStartColName, firstIndexOfSentence)); i < std::stoi(config.getAsFeature(Config::rawRangeEndColName, lineIndex)); i++) + int firstIndex = 0; + int lastIndex = 0; + try {firstIndex = std::stoi(config.getAsFeature(Config::rawRangeStartColName, firstIndexOfSentence));} + catch (std::exception & e) {util::myThrow(fmt::format("{} : '{}'", e.what(), config.getAsFeature(Config::rawRangeStartColName, firstIndexOfSentence)));} + try {lastIndex = std::stoi(config.getAsFeature(Config::rawRangeEndColName, lineIndex));} + catch (std::exception & e) {util::myThrow(fmt::format("{} : '{}'", e.what(), config.getAsFeature(Config::rawRangeEndColName, lineIndex)));} + for (auto i = firstIndex; i < lastIndex; i++) textMetadata = fmt::format("{}{}", textMetadata, config.getLetter(i)); + config.getLastNotEmptyHyp(Config::commentsColName, firstIndexOfSentence) = fmt::format("{}\n# sent_id = {}", textMetadata, config.getAsFeature(Config::sentIdColName, firstIndexOfSentence)); } }; diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp index 5bd1f7df5a014ad08938625451b892bb48e6153e..ffe2f3e7a1c2a13ee04133f5d8963b9a49765621 100644 --- a/reading_machine/src/Config.cpp +++ b/reading_machine/src/Config.cpp @@ -145,6 +145,8 @@ void Config::printForDebug(FILE * dest) const static constexpr int windowSize = 10; static constexpr int lettersWindowSize = 40; static constexpr int maxWordLength = 7; + // exceptions = special columns we wish to print + static std::set<std::string> exceptions{}; int firstLineToPrint = wordIndex; int lastLineToPrint = wordIndex; @@ -159,7 +161,7 @@ void Config::printForDebug(FILE * dest) const toPrint.back().emplace_back(""); for (unsigned int i = 0; i < getNbColumns(); i++) { - if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName) + if ((isExtraColumn(getColName(i)) and exceptions.count(getColName(i)) == 0) and getColName(i) != EOSColName) continue; toPrint.back().emplace_back(getColName(i)); } @@ -170,7 +172,7 @@ void Config::printForDebug(FILE * dest) const toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : ""); for (unsigned int i = 0; i < getNbColumns(); i++) { - if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName) + if ((isExtraColumn(getColName(i)) and exceptions.count(getColName(i)) == 0) and getColName(i) != EOSColName) continue; std::string colContent = has(i,line,0) ? getAsFeature(i, line).get() : "?"; std::string toPrintCol = colContent; diff --git a/reading_machine/src/Transition.cpp b/reading_machine/src/Transition.cpp index 1b8d309fceee9527550389ff5ee6b3b66d482240..db345911e4cbaceb80f09dedd34a7998b6390c7f 100644 --- a/reading_machine/src/Transition.cpp +++ b/reading_machine/src/Transition.cpp @@ -317,7 +317,11 @@ void Transition::initSplitWord(std::vector<std::string> words) sequence.emplace_back(Action::addCharsToCol("FORM", consumedWord.size(), Config::Object::Buffer, 0)); sequence.emplace_back(Action::consumeCharacterIndex(consumedWord)); for (unsigned int i = 1; i < words.size(); i++) + { sequence.emplace_back(Action::addHypothesisRelativeRelaxed("FORM", Config::Object::Buffer, i, words[i])); + sequence.emplace_back(Action::transformSuffix(Config::rawRangeStartColName, Config::Object::Buffer, 0, Config::rawRangeStartColName, Config::Object::Buffer, i, util::utf8string(), util::utf8string())); + sequence.emplace_back(Action::transformSuffix(Config::rawRangeEndColName, Config::Object::Buffer, 0, Config::rawRangeEndColName, Config::Object::Buffer, i, util::utf8string(), util::utf8string())); + } sequence.emplace_back(Action::setMultiwordIds(words.size()-1)); costDynamic = [words](const Config & config)