From 1d0db7200f8fc0b6cdaecfc4123140f5c0e17e5d Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 21 Feb 2020 15:19:16 +0100 Subject: [PATCH] Now correctly updating ids of multiwords token --- reading_machine/include/Config.hpp | 1 + reading_machine/src/Action.cpp | 10 ++++++---- reading_machine/src/Config.cpp | 6 ++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/reading_machine/include/Config.hpp b/reading_machine/include/Config.hpp index 310e2f4..21dab0b 100644 --- a/reading_machine/include/Config.hpp +++ b/reading_machine/include/Config.hpp @@ -91,6 +91,7 @@ class Config void popStack(); bool isComment(std::size_t lineIndex) const; bool isMultiword(std::size_t lineIndex) const; + int getMultiwordSize(std::size_t lineIndex) const; bool isEmptyNode(std::size_t lineIndex) const; bool isToken(std::size_t lineIndex) const; bool moveWordIndex(int relativeMovement); diff --git a/reading_machine/src/Action.cpp b/reading_machine/src/Action.cpp index 94bfee7..ecbcff7 100644 --- a/reading_machine/src/Action.cpp +++ b/reading_machine/src/Action.cpp @@ -287,7 +287,7 @@ Action Action::updateIds() break; util::myThrow("The current sentence is too long to be completly held by the data strucure. Consider increasing SubConfig::SpanSize"); } - if (!config.isToken(i)) + if (config.isComment(i) || config.isEmptyNode(i)) continue; if (config.getLastNotEmptyHypConst(Config::EOSColName, i) == Config::EOSSymbol1) @@ -301,14 +301,16 @@ Action Action::updateIds() for (unsigned int i = firstIndexOfSentence, currentId = 1; i <= config.getStack(0); ++i) { - if (!config.isToken(i)) + if (config.isComment(i) || config.isEmptyNode(i)) continue; if (config.getLastNotEmptyHypConst(Config::EOSColName, i) == Config::EOSSymbol1) break; - config.getFirstEmpty(Config::idColName, i) = fmt::format("{}", currentId); - ++currentId; + if (config.isMultiword(i)) + config.getFirstEmpty(Config::idColName, i) = fmt::format("{}-{}", currentId, currentId+config.getMultiwordSize(i)); + else + config.getFirstEmpty(Config::idColName, i) = fmt::format("{}", currentId++); } }; diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp index dd8ca3e..ca81d9e 100644 --- a/reading_machine/src/Config.cpp +++ b/reading_machine/src/Config.cpp @@ -334,6 +334,12 @@ bool Config::isMultiword(std::size_t lineIndex) const return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('-') != std::string::npos; } +int Config::getMultiwordSize(std::size_t lineIndex) const +{ + auto splited = util::split(getConst(idColName, lineIndex, 0).get(), '-'); + return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0])); +} + bool Config::isEmptyNode(std::size_t lineIndex) const { return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('.') != std::string::npos; -- GitLab