diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index be48db2d0f2d9a74d01d90b91f7fead8d85b7551..00e55f6b67d131450424e9866c564abac5ebaa2a 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -95,6 +95,11 @@ class MLP /// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step. bool dropoutActive; + /// @brief The current minibatch. + std::vector<FeatureModel::FeatureDescription> fds; + /// @brief gold classes of the current minibatch. + std::vector<unsigned int> golds; + private : /// @brief Add the parameters of a layer into the dynet model. diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp index 75ccfc0d932e43af596ef646d822b45774e2043a..8852ef509c890a09e53c7ed06c06135de77074fc 100644 --- a/MLP/src/MLP.cpp +++ b/MLP/src/MLP.cpp @@ -199,19 +199,35 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd) void MLP::update(FeatureModel::FeatureDescription & fd, int gold) { + fds.emplace_back(fd); + golds.emplace_back(gold); + + if ((int)fds.size() < ProgramParameters::batchSize) + return; + + std::vector<dynet::Expression> inputs; dynet::ComputationGraph cg; - std::vector<dynet::Expression> expressions; + for (auto & example : fds) + { + std::vector<dynet::Expression> expressions; - for (auto & featValue : fd.values) - expressions.emplace_back(featValue2Expression(cg, featValue)); + for (auto & featValue : example.values) + expressions.emplace_back(featValue2Expression(cg, featValue)); - dynet::Expression input = dynet::concatenate(expressions); - dynet::Expression output = run(cg, input); - dynet::Expression loss = pickneglogsoftmax(output, gold); + dynet::Expression input = dynet::concatenate(expressions); + inputs.emplace_back(input); + } + + dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs); + dynet::Expression output = run(cg, batchedInput); + dynet::Expression batchedLoss = pickneglogsoftmax(output, golds); - cg.backward(loss); + cg.backward(dynet::sum_batches(batchedLoss)); trainer->update(); + + fds.clear(); + golds.clear(); } dynet::DynetParams & MLP::getDefaultParams() @@ -234,7 +250,7 @@ dynet::Expression MLP::featValue2Expression(dynet::ComputationGraph & cg, const bool isConst = (fv.policies[i] == FeatureModel::Policy::Final) || (dict->mode == Dict::Mode::OneHot); auto & lu = dict->getLookupParameter(); - unsigned int index = dict->getValue(*fv.values[i]); + unsigned int index = dict->getValue(fv.values[i]); if(isConst) expressions.emplace_back(dynet::const_lookup(cg, lu, index)); diff --git a/error_correction/src/macaon_train_error_detector.cpp b/error_correction/src/macaon_train_error_detector.cpp index 553edbb23720bd848889008362a6180ea9601a20..5221047f047a9f94d0debf1e45e0a05f01b692af 100644 --- a/error_correction/src/macaon_train_error_detector.cpp +++ b/error_correction/src/macaon_train_error_detector.cpp @@ -55,6 +55,8 @@ po::options_description getOptionsDescription() "Learning rate of the optimizer") ("seed,s", po::value<int>()->default_value(100), "The random seed that will initialize RNG") + ("batchSize", po::value<int>()->default_value(50), + "The size of each minibatch (in number of taining examples)") ("nbTrain", po::value<int>()->default_value(0), "The number of models that will be trained, with only the random seed changing") ("duplicates", po::value<bool>()->default_value(true), @@ -498,6 +500,7 @@ int main(int argc, char * argv[]) ProgramParameters::lang = vm["lang"].as<std::string>(); ProgramParameters::nbIter = vm["nbiter"].as<int>(); ProgramParameters::seed = vm["seed"].as<int>(); + ProgramParameters::batchSize = vm["batchSize"].as<int>(); ProgramParameters::nbTrain = vm["nbTrain"].as<int>(); ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>(); ProgramParameters::interactive = vm["interactive"].as<bool>(); diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 35dadb28698df9bdaee43ff8a19399e874dec35c..0e25ebd9a62944597544db7edad8694d6d71b9a1 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -50,6 +50,7 @@ struct ProgramParameters static std::string sequenceDelimiterTape; static std::string sequenceDelimiter; static std::string classifierName; + static int batchSize; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index a9c1ec42f20a8bc7b93ba3c7579bf5443c60eabf..c863179ce7fdfcd47f3f5ae9fd8ef8c7e7277a5b 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -45,3 +45,4 @@ int ProgramParameters::nbTrain; std::string ProgramParameters::sequenceDelimiterTape; std::string ProgramParameters::sequenceDelimiter; std::string ProgramParameters::classifierName; +int ProgramParameters::batchSize; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 4122411df4e55d1c8b33e613386221c2be77a47d..33ea8ad04179124d3af8f2f6bb2e54c0fb557791 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -69,6 +69,8 @@ po::options_description getOptionsDescription() "The name of the buffer's tape that contains the delimiter token for a sequence") ("sequenceDelimiter", po::value<std::string>()->default_value("1"), "The value of the token that act as a delimiter for sequences") + ("batchSize", po::value<int>()->default_value(50), + "The size of each minibatch (in number of taining examples)") ("printTime", "Print time on stderr") ("shuffle", po::value<bool>()->default_value(true), "Shuffle examples after each iteration"); @@ -245,6 +247,7 @@ int main(int argc, char * argv[]) ProgramParameters::lang = vm["lang"].as<std::string>(); ProgramParameters::nbIter = vm["nbiter"].as<int>(); ProgramParameters::seed = vm["seed"].as<int>(); + ProgramParameters::batchSize = vm["batchSize"].as<int>(); ProgramParameters::nbTrain = vm["nbTrain"].as<int>(); ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>(); ProgramParameters::interactive = vm["interactive"].as<bool>(); diff --git a/transition_machine/include/FeatureModel.hpp b/transition_machine/include/FeatureModel.hpp index f120e97f5c5e888fa5ea61726008b50cc353df3e..f83c8d248717f798d806149b0619263e840647bf 100644 --- a/transition_machine/include/FeatureModel.hpp +++ b/transition_machine/include/FeatureModel.hpp @@ -34,9 +34,9 @@ class FeatureModel /// @brief The Dicts that contains the values and their real vector. std::vector<Dict *> dicts; /// @brief The names of the Features that compose this FeatureValue. - std::vector<const std::string *> names; + std::vector<std::string> names; /// @brief The string value of the Features. - std::vector<const std::string *> values; + std::vector<std::string> values; /// @brief The Policy of the Features. std::vector<Policy> policies; /// @brief The real valued vector as a string. @@ -46,7 +46,7 @@ class FeatureModel /// @return The real valued vector. std::string toString(unsigned int i); FeatureValue(); - FeatureValue(Dict *, const std::string *, const std::string *, Policy); + FeatureValue(Dict *, const std::string &, const std::string &, Policy); }; /// @brief The image of a Config by a FeatureModel diff --git a/transition_machine/src/FeatureBank.cpp b/transition_machine/src/FeatureBank.cpp index 1282d4d9c17ba3cdfe6ba1eeb560e6fa3176ff07..099232d177bde08da37612b8d781da268ebc28b0 100644 --- a/transition_machine/src/FeatureBank.cpp +++ b/transition_machine/src/FeatureBank.cpp @@ -164,9 +164,9 @@ FeatureModel::FeatureValue FeatureBank::actionHistory(Config & config, int index auto & history = config.getCurrentStateHistory(); if(index < 0 || index >= (int)history.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; - return {dict, &featName, dict->getStr(history[history.size()-1-index]), policy}; + return {dict, featName, history[history.size()-1-index], policy}; } FeatureModel::FeatureValue FeatureBank::entropyHistory(Config & config, int index, const std::string & featName) @@ -176,11 +176,11 @@ FeatureModel::FeatureValue FeatureBank::entropyHistory(Config & config, int inde auto & history = config.getCurrentStateEntropyHistory(); if(index < 0 || index >= (int)history.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; std::string value = std::to_string((int)history[history.size()-1-index]); - return {dict, &featName, dict->getStr(value), policy}; + return {dict, featName, value, policy}; } FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const std::string & object, const std::string & tapeName, const std::string & featName) @@ -194,7 +194,7 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s if(object == "s") { if(!config.stackHasIndex(index)) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; index = config.stackGetElem(index); } @@ -204,7 +204,7 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s } if(index < 0 || index >= (int)tape.hyp.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; int candidate = -1; unsigned int maxDist = 10; @@ -217,12 +217,12 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s } if(candidate == -1) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; if(tape[candidate].empty()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; - return {dict, &featName, &tape[candidate], policy}; + return {dict, featName, tape[candidate], policy}; } FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string & object1, int index1, const std::string & object2, int index2, const std::string & featName) @@ -235,14 +235,14 @@ FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string if (object1 == "b") { if(index1 < 0 || index1 >= (int)config.tapes[0].hyp.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; elem1 = config.head + index1; } else { if(!config.stackHasIndex(index1)) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; elem1 = config.stackGetElem(index1); } @@ -250,19 +250,19 @@ FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string if (object2 == "b") { if(index2 < 0 || index2 >= (int)config.tapes[0].hyp.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; elem2 = config.head + index2; } else { if(!config.stackHasIndex(index2)) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; elem2 = config.stackGetElem(index2); } - return {dict, &featName, dict->getStr(std::to_string(elem1-elem2)), policy}; + return {dict, featName, std::to_string(elem1-elem2), policy}; } FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const std::string & object, const std::string & tapeName, const std::string & featName) @@ -276,7 +276,7 @@ FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const s if(object == "s") { if(!config.stackHasIndex(index)) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; index = config.stackGetElem(index); } @@ -286,7 +286,7 @@ FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const s } if(index < 0 || index >= (int)tape.hyp.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; int candidate = -1; unsigned int maxDist = 10; @@ -299,12 +299,12 @@ FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const s } if(candidate == -1) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; if(tape[candidate].empty()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; - return {dict, &featName, &tape[candidate], policy}; + return {dict, featName, tape[candidate], policy}; } FeatureModel::FeatureValue FeatureBank::simpleBufferAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName) @@ -316,12 +316,12 @@ FeatureModel::FeatureValue FeatureBank::simpleBufferAccess(Config & config, int int index = config.head + relativeIndex; if(index < 0 || index >= (int)tape.hyp.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; if(tape[index].empty()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; - return {dict, &featName, &tape[index], policy}; + return {dict, featName, tape[index], policy}; } FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName) @@ -331,29 +331,29 @@ FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int r auto policy = dictPolicy2FeaturePolicy(dict->policy); if(!config.stackHasIndex(relativeIndex)) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; int index = config.stackGetElem(relativeIndex); if(index < 0 || index >= (int)tape.hyp.size()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; if(tape[index].empty()) - return {dict, &featName, &Dict::nullValueStr, policy}; + return {dict, featName, Dict::nullValueStr, policy}; - return {dict, &featName, &tape[index], policy}; + return {dict, featName, tape[index], policy}; } FeatureModel::FeatureValue FeatureBank::getUppercase(Config &, const FeatureModel::FeatureValue & fv) { Dict * dict = Dict::getDict("bool"); auto policy = dictPolicy2FeaturePolicy(dict->policy); - bool firstLetterUppercase = isUpper((*fv.values[0])[0]); + bool firstLetterUppercase = isUpper(fv.values[0][0]); - if(*fv.values[0] == Dict::nullValueStr) - return {dict, fv.names[0], &Dict::nullValueStr, policy}; + if(fv.values[0] == Dict::nullValueStr) + return {dict, fv.names[0], Dict::nullValueStr, policy}; - const std::string * str = dict->getStr(firstLetterUppercase ? std::string("true") : std::string("false")); + std::string str = firstLetterUppercase ? std::string("true") : std::string("false"); return {dict, fv.names[0], str, policy}; } @@ -362,16 +362,16 @@ FeatureModel::FeatureValue FeatureBank::getLength(Config &, const FeatureModel:: { Dict * dict = Dict::getDict("int"); auto policy = dictPolicy2FeaturePolicy(dict->policy); - int len = lengthPrinted(*fv.values[0]); + int len = lengthPrinted(fv.values[0]); - if(*fv.values[0] == Dict::nullValueStr) - return {dict, fv.names[0], &Dict::nullValueStr, policy}; + if(fv.values[0] == Dict::nullValueStr) + return {dict, fv.names[0], Dict::nullValueStr, policy}; int limit = 7; if (len > limit) len = limit; - const std::string * str = dict->getStr(std::to_string(len)); + std::string str = std::to_string(len); return {dict, fv.names[0], str, policy}; } @@ -381,13 +381,13 @@ FeatureModel::FeatureValue FeatureBank::getLetters(Config &, const FeatureModel: Dict * dict = Dict::getDict("letters"); auto policy = dictPolicy2FeaturePolicy(dict->policy); - if(*fv.values[0] == Dict::nullValueStr) - return {dict, fv.names[0], &Dict::nullValueStr, policy}; + if(fv.values[0] == Dict::nullValueStr) + return {dict, fv.names[0], Dict::nullValueStr, policy}; if(from < 0) - from = fv.values[0]->size() + from; + from = fv.values[0].size() + from; if(to < 0) - to = fv.values[0]->size() + to; + to = fv.values[0].size() + to; if(to < from) { @@ -397,13 +397,13 @@ FeatureModel::FeatureValue FeatureBank::getLetters(Config &, const FeatureModel: std::string letters; for(int i = from; i <= to; i++) - if(i >= 0 && i < (int)(*fv.values[0]).size()) - letters.push_back((*fv.values[0])[i]); + if(i >= 0 && i < (int)(fv.values[0]).size()) + letters.push_back(fv.values[0][i]); if(letters.empty()) - return {dict, fv.names[0], &Dict::nullValueStr, policy}; + return {dict, fv.names[0], Dict::nullValueStr, policy}; - const std::string * str = dict->getStr(letters); + std::string str = letters; return {dict, fv.names[0], str, policy}; } @@ -422,7 +422,6 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in Dict * dict = c.bd.getDictOfLine(tape.name); auto policy = dictPolicy2FeaturePolicy(dict->policy); bool ignored = false; - std::string * featName = &tape.name; for (auto & except : exceptions) if (except == tape.name) { @@ -435,11 +434,12 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in for (int i = from; i <= to; i++) { int index = c.head + i; + std::string featName = "b."+std::to_string(i)+"."+tape.name; if(index < 0 || index >= (int)tape.hyp.size()) { result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(&Dict::nullValueStr); + result.values.emplace_back(Dict::nullValueStr); result.policies.emplace_back(policy); continue; } @@ -447,14 +447,14 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in { result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(&Dict::nullValueStr); + result.values.emplace_back(Dict::nullValueStr); result.policies.emplace_back(policy); continue; } result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(&tape[index]); + result.values.emplace_back(tape[index]); result.policies.emplace_back(policy); } } @@ -471,7 +471,6 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con Dict * dict = c.bd.getDictOfLine(tape.name); auto policy = dictPolicy2FeaturePolicy(dict->policy); bool ignored = false; - std::string * featName = &tape.name; for (auto & except : exceptions) if (except == tape.name) { @@ -483,11 +482,12 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con for (int i = 0; i >= from; i--) { + std::string featName = "s."+std::to_string(i)+"."+tape.name; if(!c.stackHasIndex(i)) { result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(&Dict::nullValueStr); + result.values.emplace_back(Dict::nullValueStr); result.policies.emplace_back(policy); continue; } @@ -496,14 +496,14 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con { result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(&Dict::nullValueStr); + result.values.emplace_back(Dict::nullValueStr); result.policies.emplace_back(policy); continue; } result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(&tape[index]); + result.values.emplace_back(tape[index]); result.policies.emplace_back(policy); } } diff --git a/transition_machine/src/FeatureModel.cpp b/transition_machine/src/FeatureModel.cpp index f123e67a018ce64ba92c7c576532db0ace67bb1d..c65b6b4220f5f8cc9f43dad96c9575a3717c0bc4 100644 --- a/transition_machine/src/FeatureModel.cpp +++ b/transition_machine/src/FeatureModel.cpp @@ -51,7 +51,7 @@ std::string FeatureModel::FeatureValue::toString(unsigned int i) } Dict * dict = dicts[i]; - unsigned int index = dict->getValue(*values[i]); + unsigned int index = dict->getValue(values[i]); float * realVector = (*dict->getLookupParameter().values())[index].batch_ptr(0); unsigned int dim = dict->getDimension(); @@ -71,8 +71,8 @@ std::string FeatureModel::FeatureDescription::toString() for(auto featValue : values) for (unsigned int i = 0; i < featValue.dicts.size(); i++) { - int size1 = lengthPrinted(*featValue.names[i]); - int size2 = lengthPrinted(" " + std::string(*featValue.values[i])); + int size1 = lengthPrinted(featValue.names[i]); + int size2 = lengthPrinted(" " + std::string(featValue.values[i])); int size3 = lengthPrinted(" " + featValue.toString(i)); columnSizes[0] = std::max(columnSizes[0], size1); @@ -92,10 +92,10 @@ std::string FeatureModel::FeatureDescription::toString() for(auto featValue : values) for (unsigned int i = 0; i < featValue.dicts.size(); i++) { - column = *featValue.names[i]; + column = featValue.names[i]; while ((int)lengthPrinted(column) < columnSizes[0]){column.push_back(' ');} res += column; - column = " " + std::string(*featValue.values[i]); + column = " " + std::string(featValue.values[i]); while ((int)lengthPrinted(column) < columnSizes[1]){column.push_back(' ');} res += column; column = " " + featValue.toString(i); @@ -121,7 +121,7 @@ const char * FeatureModel::policy2str(Policy policy) return "null"; } -FeatureModel::FeatureValue::FeatureValue(Dict * dict, const std::string * name, const std::string * value, Policy policy) +FeatureModel::FeatureValue::FeatureValue(Dict * dict, const std::string & name, const std::string & value, Policy policy) { dicts.emplace_back(dict); names.emplace_back(name);