From fe8a3033ba93bb77869715b222231f3544bdc686 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Thu, 28 Feb 2019 16:09:10 +0100 Subject: [PATCH] Added a program option to randomly mask part of a tape --- decoder/src/macaon_decode.cpp | 6 ++++++ maca_common/include/LimitedArray.hpp | 10 ++++++++++ maca_common/include/ProgramParameters.hpp | 2 ++ maca_common/src/ProgramParameters.cpp | 2 ++ trainer/src/macaon_train.cpp | 6 ++++++ transition_machine/include/Config.hpp | 4 ++++ transition_machine/src/Config.cpp | 9 +++++++++ 7 files changed, 39 insertions(+) diff --git a/decoder/src/macaon_decode.cpp b/decoder/src/macaon_decode.cpp index 1be61fd..b005aa8 100644 --- a/decoder/src/macaon_decode.cpp +++ b/decoder/src/macaon_decode.cpp @@ -55,6 +55,10 @@ po::options_description getOptionsDescription() "The maximal size of each Dict (number of differents embeddings).") ("interactive", po::value<bool>()->default_value(true), "Is the shell interactive ? Display advancement informations") + ("tapeToMask", po::value<std::string>()->default_value("FORM"), + "The name of the Tape for which some of the elements will be masked.") + ("maskRate", po::value<float>()->default_value(0.0), + "The rate of elements of the Tape that will be masked.") ("lang", po::value<std::string>()->default_value("fr"), "Language you are working with"); @@ -154,6 +158,8 @@ int main(int argc, char * argv[]) ProgramParameters::dictCapacity = vm["dictCapacity"].as<int>(); ProgramParameters::beamSize = vm["beamSize"].as<int>(); ProgramParameters::nbChilds = vm["nbChilds"].as<int>(); + ProgramParameters::tapeToMask = vm["tapeToMask"].as<std::string>(); + ProgramParameters::maskRate = vm["maskRate"].as<float>(); ProgramParameters::optimizer = "none"; std::string featureModels = vm["featureModels"].as<std::string>(); if (!featureModels.empty()) diff --git a/maca_common/include/LimitedArray.hpp b/maca_common/include/LimitedArray.hpp index 738cd16..9009615 100644 --- a/maca_common/include/LimitedArray.hpp +++ b/maca_common/include/LimitedArray.hpp @@ -63,6 +63,16 @@ class LimitedArray data[index % data.size()].second = false; } + void maskIndex(unsigned int index) + { + data[index % data.size()].second = true; + } + + void unmaskIndex(unsigned int index) + { + data[index % data.size()].second = false; + } + int getLastIndex() const { return lastElementRealIndex; diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 675843d..07f5455 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -68,6 +68,8 @@ struct ProgramParameters static int readSize; static int dictCapacity; static bool printOutputEntropy; + static std::string tapeToMask; + static float maskRate; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index 593e22c..4fab231 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -62,4 +62,6 @@ int ProgramParameters::devTapeSize; int ProgramParameters::readSize; bool ProgramParameters::printOutputEntropy; int ProgramParameters::dictCapacity; +std::string ProgramParameters::tapeToMask; +float ProgramParameters::maskRate; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 325ef72..6062288 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -81,6 +81,10 @@ po::options_description getOptionsDescription() "The size of each minibatch (in number of taining examples)") ("dictCapacity", po::value<int>()->default_value(30000), "The maximal size of each Dict (number of differents embeddings).") + ("tapeToMask", po::value<std::string>()->default_value("FORM"), + "The name of the Tape for which some of the elements will be masked.") + ("maskRate", po::value<float>()->default_value(0.0), + "The rate of elements of the Tape that will be masked.") ("printTime", "Print time on stderr") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -289,6 +293,8 @@ int main(int argc, char * argv[]) ProgramParameters::loss = vm["loss"].as<std::string>(); ProgramParameters::dynamicEpoch = vm["epochd"].as<int>(); ProgramParameters::dynamicProbability = vm["proba"].as<float>(); + ProgramParameters::tapeToMask = vm["tapeToMask"].as<std::string>(); + ProgramParameters::maskRate = vm["maskRate"].as<float>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); ProgramParameters::iterationSize = vm["iterationSize"].as<int>(); std::string featureModels = vm["featureModels"].as<std::string>(); diff --git a/transition_machine/include/Config.hpp b/transition_machine/include/Config.hpp index 7f6f727..7e5f8a2 100644 --- a/transition_machine/include/Config.hpp +++ b/transition_machine/include/Config.hpp @@ -126,6 +126,10 @@ class Config /// @brief Get the last tape index that will be overriden with the next read. int getNextOverridenRealIndex(); void setTotalEntropy(float entropy); + /// @brief Mask a cell of the tape + /// + /// @param index the index to mask + void maskIndex(int index); }; private : diff --git a/transition_machine/src/Config.cpp b/transition_machine/src/Config.cpp index f212b77..758716d 100644 --- a/transition_machine/src/Config.cpp +++ b/transition_machine/src/Config.cpp @@ -96,6 +96,10 @@ void Config::readInput() tape.addToRef(cols[i]); tape.addToHyp(""); + + if (tape.getName() == ProgramParameters::tapeToMask) + if (choiceWithProbability(ProgramParameters::maskRate)) + tape.maskIndex(tape.refSize()-1); } haveRead++; @@ -610,3 +614,8 @@ void Config::Tape::setTotalEntropy(float entropy) totalEntropy = entropy; } +void Config::Tape::maskIndex(int index) +{ + ref.maskIndex(index); +} + -- GitLab