Skip to content
Snippets Groups Projects
Commit 7c956acf authored by Franck Dary's avatar Franck Dary
Browse files

Fixed lemmatization

parent ca56eef3
Branches
No related tags found
No related merge requests found
...@@ -97,7 +97,7 @@ int main(int argc, char * argv[]) ...@@ -97,7 +97,7 @@ int main(int argc, char * argv[])
File fplm(fplmFilename, "r"); File fplm(fplmFilename, "r");
char buffer[100000]; char buffer[100000];
std::map<std::string, int> rules; std::map<std::string, std::vector<std::string> > rules;
while (fscanf(fplm.getDescriptor(), "%[^\n]\n", buffer) == 1) while (fscanf(fplm.getDescriptor(), "%[^\n]\n", buffer) == 1)
{ {
auto splited = split(buffer, '\t'); auto splited = split(buffer, '\t');
...@@ -112,7 +112,7 @@ int main(int argc, char * argv[]) ...@@ -112,7 +112,7 @@ int main(int argc, char * argv[])
auto lemma = splited[2]; auto lemma = splited[2];
auto rule = getRule(form, lemma); auto rule = getRule(form, lemma);
rules[rule]++; rules[rule].emplace_back(buffer);
} }
File rulesFile(rulesFilename, "w"); File rulesFile(rulesFilename, "w");
...@@ -120,10 +120,11 @@ int main(int argc, char * argv[]) ...@@ -120,10 +120,11 @@ int main(int argc, char * argv[])
for (auto & it : rules) for (auto & it : rules)
{ {
if (it.second >= threshold) if ((int)it.second.size() >= threshold)
fprintf(rulesFile.getDescriptor(), "%s\n", it.first.c_str()); fprintf(rulesFile.getDescriptor(), "%s\n", it.first.c_str());
else else
fprintf(exceptionsFile.getDescriptor(), "%s\n", it.first.c_str()); for (auto & line : it.second)
fprintf(exceptionsFile.getDescriptor(), "%s\n", line.c_str());
} }
return 0; return 0;
......
...@@ -382,7 +382,6 @@ void Oracle::createDatabase() ...@@ -382,7 +382,6 @@ void Oracle::createDatabase()
char b3[1024]; char b3[1024];
char b4[1024]; char b4[1024];
while (fscanf(fd, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", b1, b2, b3, b4) != 4);
while (fscanf(fd, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", b1, b2, b3, b4) == 4) while (fscanf(fd, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", b1, b2, b3, b4) == 4)
{ {
oracle->data[std::string(b1) + std::string("_") + b2] = b3; oracle->data[std::string(b1) + std::string("_") + b2] = b3;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment