diff --git a/neural_network/src/NeuralNetwork.cpp b/neural_network/src/NeuralNetwork.cpp index 75a8ccba2b03ca45fe25d7f4f68c65df14e468e1..22fb0071003880b86cf84bedb3f97e0a1ce8121a 100644 --- a/neural_network/src/NeuralNetwork.cpp +++ b/neural_network/src/NeuralNetwork.cpp @@ -25,6 +25,12 @@ dynet::ParameterCollection & NeuralNetwork::getModel() dynet::Expression NeuralNetwork::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv) { + if (fv.dicts.empty()) + { + fprintf(stderr, "ERROR (%s) : FeatureValue is empty, cannot get its expression. Aborting.\n", ERRINFO); + exit(1); + } + std::vector<dynet::Expression> expressions; for (unsigned int i = 0; i < fv.dicts.size(); i++) diff --git a/transition_machine/include/FeatureBank.hpp b/transition_machine/include/FeatureBank.hpp index 632e6bfac787224e84c6de1f48309faa5f7101a2..dbf005c3a29b7f15323c858039045ca196930e7c 100644 --- a/transition_machine/include/FeatureBank.hpp +++ b/transition_machine/include/FeatureBank.hpp @@ -137,6 +137,7 @@ class FeatureBank /// /// @return An aggregate of Features representing the window. static FeatureModel::FeatureValue aggregateStack(Config & c, int from, const std::vector<std::string> & exceptions); + static FeatureModel::FeatureValue fasttext(Config & c, const FeatureModel::FeatureValue & word); }; #endif diff --git a/transition_machine/src/FeatureBank.cpp b/transition_machine/src/FeatureBank.cpp index 01d21a46df606467bfce9d212b5d3dfad3adca70..5c5ce4d010159f2a310b20f6558ef32f4739b72b 100644 --- a/transition_machine/src/FeatureBank.cpp +++ b/transition_machine/src/FeatureBank.cpp @@ -106,6 +106,11 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const if(infos == "U") return [access, s, tapeName, index](Config & c) {return getUppercase(c, access(c, index, tapeName, s));}; + else if(infos == "fasttext") + { + return [access, s, tapeName, index](Config & c) + {return fasttext(c, access(c, index, tapeName, s));}; + } else if(infos == "LEN") return [access, s, tapeName, index](Config & c) {return getLength(c, access(c, index, tapeName, s));}; @@ -514,3 +519,40 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con return result; } +//TODO : ne pas utiliser une feature value pour word mais un string, pour que ça marche avec les mots inconnus +FeatureModel::FeatureValue FeatureBank::fasttext(Config & c, const FeatureModel::FeatureValue & word) +{ + FeatureModel::FeatureValue result; + + Dict * lettersDict = Dict::getDict("letters"); + auto policy = dictPolicy2FeaturePolicy(lettersDict->policy); + + if(word.values[0] == Dict::nullValueStr) + return {lettersDict, word.names[0], Dict::nullValueStr, policy}; + + unsigned int wordLength = getNbSymbols(word.values[0]); + unsigned int gramLength = 2; + + if (wordLength < gramLength) + { + auto value = getLetters(c, word, 0, wordLength-1); + result.dicts.emplace_back(value.dicts[0]); + result.names.emplace_back(value.names[0]); + result.values.emplace_back(value.values[0]); + result.policies.emplace_back(value.policies[0]); + } + else + { + for (unsigned int i = 0; i+gramLength-1 < wordLength; i++) + { + auto value = getLetters(c, word, i, i+gramLength-1); + result.dicts.emplace_back(value.dicts[0]); + result.names.emplace_back(value.names[0]); + result.values.emplace_back(value.values[0]); + result.policies.emplace_back(value.policies[0]); + } + } + + return result; +} +