Skip to content
Snippets Groups Projects
Commit a88641ae authored by Franck Dary's avatar Franck Dary
Browse files

Fixed dynamic oracle for tokenization transitions

parent 80730f88
No related branches found
No related tags found
No related merge requests found
......@@ -197,9 +197,15 @@ void Transition::initIgnoreChar()
{
sequence.emplace_back(Action::ignoreCurrentCharacter());
costDynamic = [](const Config &)
costDynamic = [](const Config & config)
{
return 0;
auto letter = fmt::format("{}", config.getLetter(config.getCharacterIndex()));
auto goldWord = util::splitAsUtf8(config.getConst("FORM", config.getWordIndex(), 0).get());
auto curWord = util::splitAsUtf8(config.getAsFeature("FORM", config.getWordIndex()).get());
if (curWord.size() >= goldWord.size())
return 0;
return goldWord[curWord.size()] == letter ? 1 : 0;
};
costStatic = costDynamic;
......@@ -231,6 +237,9 @@ void Transition::initAddCharToWord()
if (!config.hasCharacter(config.getCharacterIndex()))
return std::numeric_limits<int>::max();
if (!config.isToken(config.getWordIndex()))
return std::numeric_limits<int>::max();
auto letter = fmt::format("{}", config.getLetter(config.getCharacterIndex()));
auto & goldWord = config.getConst("FORM", config.getWordIndex(), 0).get();
auto & curWord = config.getAsFeature("FORM", config.getWordIndex()).get();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment