Commit a88641ae authored by Franck Dary's avatar Franck Dary
Browse files

Fixed dynamic oracle for tokenization transitions

parent 80730f88
......@@ -197,9 +197,15 @@ void Transition::initIgnoreChar()
{
sequence.emplace_back(Action::ignoreCurrentCharacter());
costDynamic = [](const Config &)
costDynamic = [](const Config & config)
{
return 0;
auto letter = fmt::format("{}", config.getLetter(config.getCharacterIndex()));
auto goldWord = util::splitAsUtf8(config.getConst("FORM", config.getWordIndex(), 0).get());
auto curWord = util::splitAsUtf8(config.getAsFeature("FORM", config.getWordIndex()).get());
if (curWord.size() >= goldWord.size())
return 0;
return goldWord[curWord.size()] == letter ? 1 : 0;
};
costStatic = costDynamic;
......@@ -231,6 +237,9 @@ void Transition::initAddCharToWord()
if (!config.hasCharacter(config.getCharacterIndex()))
return std::numeric_limits<int>::max();
if (!config.isToken(config.getWordIndex()))
return std::numeric_limits<int>::max();
auto letter = fmt::format("{}", config.getLetter(config.getCharacterIndex()));
auto & goldWord = config.getConst("FORM", config.getWordIndex(), 0).get();
auto & curWord = config.getAsFeature("FORM", config.getWordIndex()).get();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment