diff --git a/decoder/src/macaon_decode.cpp b/decoder/src/macaon_decode.cpp index 1be61fdb9e3f497c33c551be1107038241cb39e0..b005aa87fed4c5ebdd98689706976f919d95b60d 100644 --- a/decoder/src/macaon_decode.cpp +++ b/decoder/src/macaon_decode.cpp @@ -55,6 +55,10 @@ po::options_description getOptionsDescription() "The maximal size of each Dict (number of differents embeddings).") ("interactive", po::value<bool>()->default_value(true), "Is the shell interactive ? Display advancement informations") + ("tapeToMask", po::value<std::string>()->default_value("FORM"), + "The name of the Tape for which some of the elements will be masked.") + ("maskRate", po::value<float>()->default_value(0.0), + "The rate of elements of the Tape that will be masked.") ("lang", po::value<std::string>()->default_value("fr"), "Language you are working with"); @@ -154,6 +158,8 @@ int main(int argc, char * argv[]) ProgramParameters::dictCapacity = vm["dictCapacity"].as<int>(); ProgramParameters::beamSize = vm["beamSize"].as<int>(); ProgramParameters::nbChilds = vm["nbChilds"].as<int>(); + ProgramParameters::tapeToMask = vm["tapeToMask"].as<std::string>(); + ProgramParameters::maskRate = vm["maskRate"].as<float>(); ProgramParameters::optimizer = "none"; std::string featureModels = vm["featureModels"].as<std::string>(); if (!featureModels.empty()) diff --git a/maca_common/include/LimitedArray.hpp b/maca_common/include/LimitedArray.hpp index 738cd16888ad0d20f8bc36140d165608082b961e..900961571f754ada3959ab5cc14cf9b07bef29c8 100644 --- a/maca_common/include/LimitedArray.hpp +++ b/maca_common/include/LimitedArray.hpp @@ -63,6 +63,16 @@ class LimitedArray data[index % data.size()].second = false; } + void maskIndex(unsigned int index) + { + data[index % data.size()].second = true; + } + + void unmaskIndex(unsigned int index) + { + data[index % data.size()].second = false; + } + int getLastIndex() const { return lastElementRealIndex; diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 675843d7044069c64d69d9a1e6044abd72f37a6b..07f54550a207903db231e58bc10dfe93018c7039 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -68,6 +68,8 @@ struct ProgramParameters static int readSize; static int dictCapacity; static bool printOutputEntropy; + static std::string tapeToMask; + static float maskRate; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index 593e22cf21002aa0ed882f84bc51e715c0c18968..4fab2314fb8dd9690e2b8ecb33fa3a3171728084 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -62,4 +62,6 @@ int ProgramParameters::devTapeSize; int ProgramParameters::readSize; bool ProgramParameters::printOutputEntropy; int ProgramParameters::dictCapacity; +std::string ProgramParameters::tapeToMask; +float ProgramParameters::maskRate; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 325ef72717b56fa545af7990756f7874391907eb..60622887416b509bf37be04b254478455b6911ff 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -81,6 +81,10 @@ po::options_description getOptionsDescription() "The size of each minibatch (in number of taining examples)") ("dictCapacity", po::value<int>()->default_value(30000), "The maximal size of each Dict (number of differents embeddings).") + ("tapeToMask", po::value<std::string>()->default_value("FORM"), + "The name of the Tape for which some of the elements will be masked.") + ("maskRate", po::value<float>()->default_value(0.0), + "The rate of elements of the Tape that will be masked.") ("printTime", "Print time on stderr") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -289,6 +293,8 @@ int main(int argc, char * argv[]) ProgramParameters::loss = vm["loss"].as<std::string>(); ProgramParameters::dynamicEpoch = vm["epochd"].as<int>(); ProgramParameters::dynamicProbability = vm["proba"].as<float>(); + ProgramParameters::tapeToMask = vm["tapeToMask"].as<std::string>(); + ProgramParameters::maskRate = vm["maskRate"].as<float>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); ProgramParameters::iterationSize = vm["iterationSize"].as<int>(); std::string featureModels = vm["featureModels"].as<std::string>(); diff --git a/transition_machine/include/Config.hpp b/transition_machine/include/Config.hpp index 7f6f72754948af3f82b37a9ab7b5d4050500b7d5..7e5f8a2f9e72b3cc4839e5339379f74451d7f426 100644 --- a/transition_machine/include/Config.hpp +++ b/transition_machine/include/Config.hpp @@ -126,6 +126,10 @@ class Config /// @brief Get the last tape index that will be overriden with the next read. int getNextOverridenRealIndex(); void setTotalEntropy(float entropy); + /// @brief Mask a cell of the tape + /// + /// @param index the index to mask + void maskIndex(int index); }; private : diff --git a/transition_machine/src/Config.cpp b/transition_machine/src/Config.cpp index f212b77be9800d68f76effdc5cff6d98d60f5bda..758716d1cdf1f90f4356cfceed6895c2c6d125c0 100644 --- a/transition_machine/src/Config.cpp +++ b/transition_machine/src/Config.cpp @@ -96,6 +96,10 @@ void Config::readInput() tape.addToRef(cols[i]); tape.addToHyp(""); + + if (tape.getName() == ProgramParameters::tapeToMask) + if (choiceWithProbability(ProgramParameters::maskRate)) + tape.maskIndex(tape.refSize()-1); } haveRead++; @@ -610,3 +614,8 @@ void Config::Tape::setTotalEntropy(float entropy) totalEntropy = entropy; } +void Config::Tape::maskIndex(int index) +{ + ref.maskIndex(index); +} +