From 7c956acfe2d26f9561de41241cd9c2a9bcea2566 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 10 Apr 2019 15:58:54 +0200 Subject: [PATCH] Fixed lemmatization --- maca_common/src/macaon_compute_l_rules.cpp | 9 +++++---- transition_machine/src/Oracle.cpp | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/maca_common/src/macaon_compute_l_rules.cpp b/maca_common/src/macaon_compute_l_rules.cpp index 12a1b94..d4f425e 100644 --- a/maca_common/src/macaon_compute_l_rules.cpp +++ b/maca_common/src/macaon_compute_l_rules.cpp @@ -97,7 +97,7 @@ int main(int argc, char * argv[]) File fplm(fplmFilename, "r"); char buffer[100000]; - std::map<std::string, int> rules; + std::map<std::string, std::vector<std::string> > rules; while (fscanf(fplm.getDescriptor(), "%[^\n]\n", buffer) == 1) { auto splited = split(buffer, '\t'); @@ -112,7 +112,7 @@ int main(int argc, char * argv[]) auto lemma = splited[2]; auto rule = getRule(form, lemma); - rules[rule]++; + rules[rule].emplace_back(buffer); } File rulesFile(rulesFilename, "w"); @@ -120,10 +120,11 @@ int main(int argc, char * argv[]) for (auto & it : rules) { - if (it.second >= threshold) + if ((int)it.second.size() >= threshold) fprintf(rulesFile.getDescriptor(), "%s\n", it.first.c_str()); else - fprintf(exceptionsFile.getDescriptor(), "%s\n", it.first.c_str()); + for (auto & line : it.second) + fprintf(exceptionsFile.getDescriptor(), "%s\n", line.c_str()); } return 0; diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index a208dda..9a33b9a 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -382,7 +382,6 @@ void Oracle::createDatabase() char b3[1024]; char b4[1024]; - while (fscanf(fd, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", b1, b2, b3, b4) != 4); while (fscanf(fd, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", b1, b2, b3, b4) == 4) { oracle->data[std::string(b1) + std::string("_") + b2] = b3; -- GitLab