From 5fd7d4bc63f17586986ca26ab12ab1730615ea2d Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 23 Oct 2019 12:16:55 +0200 Subject: [PATCH] Improved lemmatization --- transition_machine/src/Oracle.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index 097a7a3..a603532 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -406,6 +406,11 @@ void Oracle::createDatabase() newState = "lemmatizer_rules"; movement = 0; } + else if (previousAction == "nothing") + { + newState = "lemmatizer_lookup"; + movement = 1; + } else { newState = "lemmatizer_lookup"; @@ -676,6 +681,16 @@ void Oracle::createDatabase() const std::string & form = c.getTape("FORM")[0]; const std::string & pos = c.getTape("POS")[0]; std::string lemma; + + if (c.hasTape("ID")) + { + auto & id = c.getTape("ID")[0]; + if (!id.empty()) + { + if (util::split(id, '-').size() > 1) + return std::string("NOTHING"); + } + } if (oracle->data.count(form + "_" + pos)) lemma = oracle->data[form + "_" + pos]; -- GitLab