Skip to content
Snippets Groups Projects
Commit be897165 authored by Franck Dary's avatar Franck Dary
Browse files

Started to implement fasttext style embeddings

parent e756cfa9
No related branches found
No related tags found
No related merge requests found
...@@ -25,6 +25,12 @@ dynet::ParameterCollection & NeuralNetwork::getModel() ...@@ -25,6 +25,12 @@ dynet::ParameterCollection & NeuralNetwork::getModel()
dynet::Expression NeuralNetwork::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv) dynet::Expression NeuralNetwork::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv)
{ {
if (fv.dicts.empty())
{
fprintf(stderr, "ERROR (%s) : FeatureValue is empty, cannot get its expression. Aborting.\n", ERRINFO);
exit(1);
}
std::vector<dynet::Expression> expressions; std::vector<dynet::Expression> expressions;
for (unsigned int i = 0; i < fv.dicts.size(); i++) for (unsigned int i = 0; i < fv.dicts.size(); i++)
......
...@@ -137,6 +137,7 @@ class FeatureBank ...@@ -137,6 +137,7 @@ class FeatureBank
/// ///
/// @return An aggregate of Features representing the window. /// @return An aggregate of Features representing the window.
static FeatureModel::FeatureValue aggregateStack(Config & c, int from, const std::vector<std::string> & exceptions); static FeatureModel::FeatureValue aggregateStack(Config & c, int from, const std::vector<std::string> & exceptions);
static FeatureModel::FeatureValue fasttext(Config & c, const FeatureModel::FeatureValue & word);
}; };
#endif #endif
...@@ -106,6 +106,11 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const ...@@ -106,6 +106,11 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const
if(infos == "U") if(infos == "U")
return [access, s, tapeName, index](Config & c) return [access, s, tapeName, index](Config & c)
{return getUppercase(c, access(c, index, tapeName, s));}; {return getUppercase(c, access(c, index, tapeName, s));};
else if(infos == "fasttext")
{
return [access, s, tapeName, index](Config & c)
{return fasttext(c, access(c, index, tapeName, s));};
}
else if(infos == "LEN") else if(infos == "LEN")
return [access, s, tapeName, index](Config & c) return [access, s, tapeName, index](Config & c)
{return getLength(c, access(c, index, tapeName, s));}; {return getLength(c, access(c, index, tapeName, s));};
...@@ -514,3 +519,40 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con ...@@ -514,3 +519,40 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con
return result; return result;
} }
//TODO : ne pas utiliser une feature value pour word mais un string, pour que ça marche avec les mots inconnus
FeatureModel::FeatureValue FeatureBank::fasttext(Config & c, const FeatureModel::FeatureValue & word)
{
FeatureModel::FeatureValue result;
Dict * lettersDict = Dict::getDict("letters");
auto policy = dictPolicy2FeaturePolicy(lettersDict->policy);
if(word.values[0] == Dict::nullValueStr)
return {lettersDict, word.names[0], Dict::nullValueStr, policy};
unsigned int wordLength = getNbSymbols(word.values[0]);
unsigned int gramLength = 2;
if (wordLength < gramLength)
{
auto value = getLetters(c, word, 0, wordLength-1);
result.dicts.emplace_back(value.dicts[0]);
result.names.emplace_back(value.names[0]);
result.values.emplace_back(value.values[0]);
result.policies.emplace_back(value.policies[0]);
}
else
{
for (unsigned int i = 0; i+gramLength-1 < wordLength; i++)
{
auto value = getLetters(c, word, i, i+gramLength-1);
result.dicts.emplace_back(value.dicts[0]);
result.names.emplace_back(value.names[0]);
result.values.emplace_back(value.values[0]);
result.policies.emplace_back(value.policies[0]);
}
}
return result;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment