Commit e925b559 authored by Franck Dary's avatar Franck Dary
Browse files

<unk> in pretrained embeddgins is used for Dict::unknownValueStr

parent fd4bc157
......@@ -45,8 +45,10 @@ void Submodule::loadPretrainedW2vEmbeddings(torch::nn::Embedding & embeddings, s
util::myThrow(fmt::format("invalid w2v line '{}' less than 2 columns", buffer));
auto dictIndex = getDict().getIndexOrInsert(splited[0]);
if (splited[0] == "<unk>")
dictIndex = getDict().getIndexOrInsert(Dict::unknownValueStr);
if (dictIndex == getDict().getIndexOrInsert(Dict::unknownValueStr) or dictIndex == getDict().getIndexOrInsert(Dict::nullValueStr) or dictIndex == getDict().getIndexOrInsert(Dict::emptyValueStr))
if (splited[0] != "<unk>" and splited[0] != Dict::unknownValueStr and (dictIndex == getDict().getIndexOrInsert(Dict::unknownValueStr) or dictIndex == getDict().getIndexOrInsert(Dict::nullValueStr) or dictIndex == getDict().getIndexOrInsert(Dict::emptyValueStr)))
continue;
if (embeddingsSize != splited.size()-1)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment