From 4484489eedaf2d5235bd812254f5145b4661e197 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Sat, 15 Feb 2020 23:31:15 +0100 Subject: [PATCH] Fixed multiple issues regarding EOS --- reading_machine/include/Action.hpp | 1 + reading_machine/include/SubConfig.hpp | 2 +- reading_machine/src/Action.cpp | 54 ++++++++++++++++++++++++++- reading_machine/src/Config.cpp | 31 +++++++++++++-- reading_machine/src/Transition.cpp | 22 +++++++++-- 5 files changed, 100 insertions(+), 10 deletions(-) diff --git a/reading_machine/include/Action.hpp b/reading_machine/include/Action.hpp index 03158ad..a20f68a 100644 --- a/reading_machine/include/Action.hpp +++ b/reading_machine/include/Action.hpp @@ -54,6 +54,7 @@ class Action static Action popStack(); static Action emptyStack(); static Action setRoot(); + static Action updateIds(); static Action attach(Object governorObject, int governorIndex, Object dependentObject, int dependentIndex); }; diff --git a/reading_machine/include/SubConfig.hpp b/reading_machine/include/SubConfig.hpp index 66fbcc0..2f1efd1 100644 --- a/reading_machine/include/SubConfig.hpp +++ b/reading_machine/include/SubConfig.hpp @@ -8,7 +8,7 @@ class SubConfig : public Config { private : - static constexpr std::size_t spanSize = 200; + static constexpr std::size_t spanSize = 800; private : diff --git a/reading_machine/src/Action.cpp b/reading_machine/src/Action.cpp index 069c3c0..a3e468d 100644 --- a/reading_machine/src/Action.cpp +++ b/reading_machine/src/Action.cpp @@ -240,7 +240,7 @@ Action Action::setRoot() if (config.getLastNotEmptyHypConst(Config::EOSColName, i) == Config::EOSSymbol1) break; - if (util::isEmpty(config.getLastNotEmptyHyp(Config::headColName, i))) + if (util::isEmpty(config.getLastNotEmptyHypConst(Config::headColName, i))) { if (i == rootIndex) { @@ -273,6 +273,57 @@ Action Action::setRoot() return {Type::Write, apply, undo, appliable}; } +Action Action::updateIds() +{ + auto apply = [](Config & config, Action & a) + { + int firstIndexOfSentence = -1; + for (int i = config.getWordIndex()-1; true; --i) + { + if (!config.has(0, i, 0)) + { + if (i < 0) + break; + util::myThrow("The current sentence is too long to be completly held by the data strucure. Consider increasing SubConfig::SpanSize"); + } + if (!config.isToken(i)) + continue; + + if (config.getLastNotEmptyHypConst(Config::EOSColName, i) == Config::EOSSymbol1) + break; + + firstIndexOfSentence = i; + } + + if (firstIndexOfSentence < 0) + util::myThrow("could not find any token in current sentence"); + + for (unsigned int i = firstIndexOfSentence, currentId = 1; i < config.getWordIndex(); ++i) + { + if (!config.isToken(i)) + continue; + + if (config.getLastNotEmptyHypConst(Config::EOSColName, i) == Config::EOSSymbol1) + break; + + config.getFirstEmpty(Config::idColName, i) = fmt::format("{}", currentId); + ++currentId; + } + }; + + auto undo = [](Config & config, Action & a) + { + // TODO : undo this + }; + + auto appliable = [](const Config &, const Action &) + { + return true; + }; + + return {Type::Write, apply, undo, appliable}; +} + Action Action::attach(Object governorObject, int governorIndex, Object dependentObject, int dependentIndex) { auto apply = [governorObject, governorIndex, dependentObject, dependentIndex](Config & config, Action & a) @@ -322,4 +373,3 @@ Action::Object Action::str2object(const std::string & s) util::myThrow(fmt::format("Invalid object '{}'", s)); return Object::Buffer; } - diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp index 8e6a12f..1f4ba62 100644 --- a/reading_machine/src/Config.cpp +++ b/reading_machine/src/Config.cpp @@ -62,11 +62,31 @@ std::size_t Config::getNbLines() const void Config::print(FILE * dest) const { + std::vector<std::string> currentSequence; + std::vector<std::string> currentSequenceComments; + + auto flushCurrentSequence = [&dest, ¤tSequence, ¤tSequenceComments]() + { + if (currentSequence.empty() && currentSequenceComments.empty()) + return; + + for (auto & comment : currentSequenceComments) + fmt::print(dest, "{}", comment); + + for (auto & line : currentSequence) + fmt::print(dest, "{}", line); + + fmt::print(dest, "\n"); + + currentSequence.clear(); + currentSequenceComments.clear(); + }; + for (unsigned int line = 0; line < getNbLines(); line++) { if (isComment(getFirstLineIndex()+line)) { - fmt::print(dest, "{}\n", getConst(0, getFirstLineIndex()+line, 0)); + currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0))); continue; } for (unsigned int i = 0; i < getNbColumns()-1; i++) @@ -76,11 +96,14 @@ void Config::print(FILE * dest) const if (valueToPrint.empty()) valueToPrint = "_"; - fmt::print(dest, "{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n"); + currentSequence.emplace_back(fmt::format("{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n")); } - if (getLastNotEmptyConst(EOSColName, getFirstLineIndex()+line) == EOSSymbol1) - fmt::print(dest, "\n"); + auto & eosColContent = isPredicted(EOSColName) ? getLastNotEmptyHypConst(EOSColName, getFirstLineIndex()+line) : getLastNotEmptyConst(EOSColName, getFirstLineIndex()+line); + if (eosColContent == EOSSymbol1) + flushCurrentSequence(); } + + flushCurrentSequence(); } void Config::printForDebug(FILE * dest) const diff --git a/reading_machine/src/Transition.cpp b/reading_machine/src/Transition.cpp index f792b9f..0489b8b 100644 --- a/reading_machine/src/Transition.cpp +++ b/reading_machine/src/Transition.cpp @@ -254,6 +254,7 @@ void Transition::initReduce() void Transition::initEOS() { sequence.emplace_back(Action::setRoot()); + sequence.emplace_back(Action::updateIds()); sequence.emplace_back(Action::addHypothesisRelative(Config::EOSColName, Action::Object::Stack, 0, Config::EOSSymbol1)); sequence.emplace_back(Action::emptyStack()); @@ -268,10 +269,25 @@ void Transition::initEOS() int cost = 0; if (config.getConst(Config::EOSColName, config.getStack(0), 0) != Config::EOSSymbol1) - ++cost; + cost += 100; - if (util::isEmpty(config.getLastNotEmptyHypConst(Config::headColName, config.getStack(0)))) - ++cost; + auto topStackIndex = config.getStack(0); + auto topStackGov = config.getConst(Config::headColName, topStackIndex, 0); + auto topStackGovPred = config.getLastNotEmptyHypConst(Config::headColName, topStackIndex); + + --cost; + for (int i = 0; config.hasStack(i); ++i) + { + if (!config.has(0, config.getStack(i), 0)) + continue; + + auto otherStackIndex = config.getStack(i); + auto stackId = config.getConst(Config::idColName, otherStackIndex, 0); + auto stackGovPred = config.getLastNotEmptyHypConst(Config::headColName, otherStackIndex); + + if (util::isEmpty(stackGovPred)) + ++cost; + } return cost; }; -- GitLab