Skip to content
Snippets Groups Projects
Commit 86f4b611 authored by Franck Dary's avatar Franck Dary
Browse files

Minbatching is now working

parent 36a24359
Branches
No related tags found
No related merge requests found
......@@ -95,6 +95,11 @@ class MLP
/// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step.
bool dropoutActive;
/// @brief The current minibatch.
std::vector<FeatureModel::FeatureDescription> fds;
/// @brief gold classes of the current minibatch.
std::vector<unsigned int> golds;
private :
/// @brief Add the parameters of a layer into the dynet model.
......
......@@ -199,19 +199,35 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd)
void MLP::update(FeatureModel::FeatureDescription & fd, int gold)
{
fds.emplace_back(fd);
golds.emplace_back(gold);
if ((int)fds.size() < ProgramParameters::batchSize)
return;
std::vector<dynet::Expression> inputs;
dynet::ComputationGraph cg;
for (auto & example : fds)
{
std::vector<dynet::Expression> expressions;
for (auto & featValue : fd.values)
for (auto & featValue : example.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
dynet::Expression input = dynet::concatenate(expressions);
dynet::Expression output = run(cg, input);
dynet::Expression loss = pickneglogsoftmax(output, gold);
inputs.emplace_back(input);
}
cg.backward(loss);
dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs);
dynet::Expression output = run(cg, batchedInput);
dynet::Expression batchedLoss = pickneglogsoftmax(output, golds);
cg.backward(dynet::sum_batches(batchedLoss));
trainer->update();
fds.clear();
golds.clear();
}
dynet::DynetParams & MLP::getDefaultParams()
......@@ -234,7 +250,7 @@ dynet::Expression MLP::featValue2Expression(dynet::ComputationGraph & cg, const
bool isConst = (fv.policies[i] == FeatureModel::Policy::Final) || (dict->mode == Dict::Mode::OneHot);
auto & lu = dict->getLookupParameter();
unsigned int index = dict->getValue(*fv.values[i]);
unsigned int index = dict->getValue(fv.values[i]);
if(isConst)
expressions.emplace_back(dynet::const_lookup(cg, lu, index));
......
......@@ -55,6 +55,8 @@ po::options_description getOptionsDescription()
"Learning rate of the optimizer")
("seed,s", po::value<int>()->default_value(100),
"The random seed that will initialize RNG")
("batchSize", po::value<int>()->default_value(50),
"The size of each minibatch (in number of taining examples)")
("nbTrain", po::value<int>()->default_value(0),
"The number of models that will be trained, with only the random seed changing")
("duplicates", po::value<bool>()->default_value(true),
......@@ -498,6 +500,7 @@ int main(int argc, char * argv[])
ProgramParameters::lang = vm["lang"].as<std::string>();
ProgramParameters::nbIter = vm["nbiter"].as<int>();
ProgramParameters::seed = vm["seed"].as<int>();
ProgramParameters::batchSize = vm["batchSize"].as<int>();
ProgramParameters::nbTrain = vm["nbTrain"].as<int>();
ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>();
ProgramParameters::interactive = vm["interactive"].as<bool>();
......
......@@ -50,6 +50,7 @@ struct ProgramParameters
static std::string sequenceDelimiterTape;
static std::string sequenceDelimiter;
static std::string classifierName;
static int batchSize;
private :
......
......@@ -45,3 +45,4 @@ int ProgramParameters::nbTrain;
std::string ProgramParameters::sequenceDelimiterTape;
std::string ProgramParameters::sequenceDelimiter;
std::string ProgramParameters::classifierName;
int ProgramParameters::batchSize;
......@@ -69,6 +69,8 @@ po::options_description getOptionsDescription()
"The name of the buffer's tape that contains the delimiter token for a sequence")
("sequenceDelimiter", po::value<std::string>()->default_value("1"),
"The value of the token that act as a delimiter for sequences")
("batchSize", po::value<int>()->default_value(50),
"The size of each minibatch (in number of taining examples)")
("printTime", "Print time on stderr")
("shuffle", po::value<bool>()->default_value(true),
"Shuffle examples after each iteration");
......@@ -245,6 +247,7 @@ int main(int argc, char * argv[])
ProgramParameters::lang = vm["lang"].as<std::string>();
ProgramParameters::nbIter = vm["nbiter"].as<int>();
ProgramParameters::seed = vm["seed"].as<int>();
ProgramParameters::batchSize = vm["batchSize"].as<int>();
ProgramParameters::nbTrain = vm["nbTrain"].as<int>();
ProgramParameters::removeDuplicates = vm["duplicates"].as<bool>();
ProgramParameters::interactive = vm["interactive"].as<bool>();
......
......@@ -34,9 +34,9 @@ class FeatureModel
/// @brief The Dicts that contains the values and their real vector.
std::vector<Dict *> dicts;
/// @brief The names of the Features that compose this FeatureValue.
std::vector<const std::string *> names;
std::vector<std::string> names;
/// @brief The string value of the Features.
std::vector<const std::string *> values;
std::vector<std::string> values;
/// @brief The Policy of the Features.
std::vector<Policy> policies;
/// @brief The real valued vector as a string.
......@@ -46,7 +46,7 @@ class FeatureModel
/// @return The real valued vector.
std::string toString(unsigned int i);
FeatureValue();
FeatureValue(Dict *, const std::string *, const std::string *, Policy);
FeatureValue(Dict *, const std::string &, const std::string &, Policy);
};
/// @brief The image of a Config by a FeatureModel
......
......@@ -164,9 +164,9 @@ FeatureModel::FeatureValue FeatureBank::actionHistory(Config & config, int index
auto & history = config.getCurrentStateHistory();
if(index < 0 || index >= (int)history.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
return {dict, &featName, dict->getStr(history[history.size()-1-index]), policy};
return {dict, featName, history[history.size()-1-index], policy};
}
FeatureModel::FeatureValue FeatureBank::entropyHistory(Config & config, int index, const std::string & featName)
......@@ -176,11 +176,11 @@ FeatureModel::FeatureValue FeatureBank::entropyHistory(Config & config, int inde
auto & history = config.getCurrentStateEntropyHistory();
if(index < 0 || index >= (int)history.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
std::string value = std::to_string((int)history[history.size()-1-index]);
return {dict, &featName, dict->getStr(value), policy};
return {dict, featName, value, policy};
}
FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const std::string & object, const std::string & tapeName, const std::string & featName)
......@@ -194,7 +194,7 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s
if(object == "s")
{
if(!config.stackHasIndex(index))
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
index = config.stackGetElem(index);
}
......@@ -204,7 +204,7 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s
}
if(index < 0 || index >= (int)tape.hyp.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
int candidate = -1;
unsigned int maxDist = 10;
......@@ -217,12 +217,12 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s
}
if(candidate == -1)
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
if(tape[candidate].empty())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
return {dict, &featName, &tape[candidate], policy};
return {dict, featName, tape[candidate], policy};
}
FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string & object1, int index1, const std::string & object2, int index2, const std::string & featName)
......@@ -235,14 +235,14 @@ FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string
if (object1 == "b")
{
if(index1 < 0 || index1 >= (int)config.tapes[0].hyp.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
elem1 = config.head + index1;
}
else
{
if(!config.stackHasIndex(index1))
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
elem1 = config.stackGetElem(index1);
}
......@@ -250,19 +250,19 @@ FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string
if (object2 == "b")
{
if(index2 < 0 || index2 >= (int)config.tapes[0].hyp.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
elem2 = config.head + index2;
}
else
{
if(!config.stackHasIndex(index2))
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
elem2 = config.stackGetElem(index2);
}
return {dict, &featName, dict->getStr(std::to_string(elem1-elem2)), policy};
return {dict, featName, std::to_string(elem1-elem2), policy};
}
FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const std::string & object, const std::string & tapeName, const std::string & featName)
......@@ -276,7 +276,7 @@ FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const s
if(object == "s")
{
if(!config.stackHasIndex(index))
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
index = config.stackGetElem(index);
}
......@@ -286,7 +286,7 @@ FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const s
}
if(index < 0 || index >= (int)tape.hyp.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
int candidate = -1;
unsigned int maxDist = 10;
......@@ -299,12 +299,12 @@ FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const s
}
if(candidate == -1)
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
if(tape[candidate].empty())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
return {dict, &featName, &tape[candidate], policy};
return {dict, featName, tape[candidate], policy};
}
FeatureModel::FeatureValue FeatureBank::simpleBufferAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName)
......@@ -316,12 +316,12 @@ FeatureModel::FeatureValue FeatureBank::simpleBufferAccess(Config & config, int
int index = config.head + relativeIndex;
if(index < 0 || index >= (int)tape.hyp.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
if(tape[index].empty())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
return {dict, &featName, &tape[index], policy};
return {dict, featName, tape[index], policy};
}
FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName)
......@@ -331,29 +331,29 @@ FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int r
auto policy = dictPolicy2FeaturePolicy(dict->policy);
if(!config.stackHasIndex(relativeIndex))
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
int index = config.stackGetElem(relativeIndex);
if(index < 0 || index >= (int)tape.hyp.size())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
if(tape[index].empty())
return {dict, &featName, &Dict::nullValueStr, policy};
return {dict, featName, Dict::nullValueStr, policy};
return {dict, &featName, &tape[index], policy};
return {dict, featName, tape[index], policy};
}
FeatureModel::FeatureValue FeatureBank::getUppercase(Config &, const FeatureModel::FeatureValue & fv)
{
Dict * dict = Dict::getDict("bool");
auto policy = dictPolicy2FeaturePolicy(dict->policy);
bool firstLetterUppercase = isUpper((*fv.values[0])[0]);
bool firstLetterUppercase = isUpper(fv.values[0][0]);
if(*fv.values[0] == Dict::nullValueStr)
return {dict, fv.names[0], &Dict::nullValueStr, policy};
if(fv.values[0] == Dict::nullValueStr)
return {dict, fv.names[0], Dict::nullValueStr, policy};
const std::string * str = dict->getStr(firstLetterUppercase ? std::string("true") : std::string("false"));
std::string str = firstLetterUppercase ? std::string("true") : std::string("false");
return {dict, fv.names[0], str, policy};
}
......@@ -362,16 +362,16 @@ FeatureModel::FeatureValue FeatureBank::getLength(Config &, const FeatureModel::
{
Dict * dict = Dict::getDict("int");
auto policy = dictPolicy2FeaturePolicy(dict->policy);
int len = lengthPrinted(*fv.values[0]);
int len = lengthPrinted(fv.values[0]);
if(*fv.values[0] == Dict::nullValueStr)
return {dict, fv.names[0], &Dict::nullValueStr, policy};
if(fv.values[0] == Dict::nullValueStr)
return {dict, fv.names[0], Dict::nullValueStr, policy};
int limit = 7;
if (len > limit)
len = limit;
const std::string * str = dict->getStr(std::to_string(len));
std::string str = std::to_string(len);
return {dict, fv.names[0], str, policy};
}
......@@ -381,13 +381,13 @@ FeatureModel::FeatureValue FeatureBank::getLetters(Config &, const FeatureModel:
Dict * dict = Dict::getDict("letters");
auto policy = dictPolicy2FeaturePolicy(dict->policy);
if(*fv.values[0] == Dict::nullValueStr)
return {dict, fv.names[0], &Dict::nullValueStr, policy};
if(fv.values[0] == Dict::nullValueStr)
return {dict, fv.names[0], Dict::nullValueStr, policy};
if(from < 0)
from = fv.values[0]->size() + from;
from = fv.values[0].size() + from;
if(to < 0)
to = fv.values[0]->size() + to;
to = fv.values[0].size() + to;
if(to < from)
{
......@@ -397,13 +397,13 @@ FeatureModel::FeatureValue FeatureBank::getLetters(Config &, const FeatureModel:
std::string letters;
for(int i = from; i <= to; i++)
if(i >= 0 && i < (int)(*fv.values[0]).size())
letters.push_back((*fv.values[0])[i]);
if(i >= 0 && i < (int)(fv.values[0]).size())
letters.push_back(fv.values[0][i]);
if(letters.empty())
return {dict, fv.names[0], &Dict::nullValueStr, policy};
return {dict, fv.names[0], Dict::nullValueStr, policy};
const std::string * str = dict->getStr(letters);
std::string str = letters;
return {dict, fv.names[0], str, policy};
}
......@@ -422,7 +422,6 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in
Dict * dict = c.bd.getDictOfLine(tape.name);
auto policy = dictPolicy2FeaturePolicy(dict->policy);
bool ignored = false;
std::string * featName = &tape.name;
for (auto & except : exceptions)
if (except == tape.name)
{
......@@ -435,11 +434,12 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in
for (int i = from; i <= to; i++)
{
int index = c.head + i;
std::string featName = "b."+std::to_string(i)+"."+tape.name;
if(index < 0 || index >= (int)tape.hyp.size())
{
result.dicts.emplace_back(dict);
result.names.emplace_back(featName);
result.values.emplace_back(&Dict::nullValueStr);
result.values.emplace_back(Dict::nullValueStr);
result.policies.emplace_back(policy);
continue;
}
......@@ -447,14 +447,14 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in
{
result.dicts.emplace_back(dict);
result.names.emplace_back(featName);
result.values.emplace_back(&Dict::nullValueStr);
result.values.emplace_back(Dict::nullValueStr);
result.policies.emplace_back(policy);
continue;
}
result.dicts.emplace_back(dict);
result.names.emplace_back(featName);
result.values.emplace_back(&tape[index]);
result.values.emplace_back(tape[index]);
result.policies.emplace_back(policy);
}
}
......@@ -471,7 +471,6 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con
Dict * dict = c.bd.getDictOfLine(tape.name);
auto policy = dictPolicy2FeaturePolicy(dict->policy);
bool ignored = false;
std::string * featName = &tape.name;
for (auto & except : exceptions)
if (except == tape.name)
{
......@@ -483,11 +482,12 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con
for (int i = 0; i >= from; i--)
{
std::string featName = "s."+std::to_string(i)+"."+tape.name;
if(!c.stackHasIndex(i))
{
result.dicts.emplace_back(dict);
result.names.emplace_back(featName);
result.values.emplace_back(&Dict::nullValueStr);
result.values.emplace_back(Dict::nullValueStr);
result.policies.emplace_back(policy);
continue;
}
......@@ -496,14 +496,14 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con
{
result.dicts.emplace_back(dict);
result.names.emplace_back(featName);
result.values.emplace_back(&Dict::nullValueStr);
result.values.emplace_back(Dict::nullValueStr);
result.policies.emplace_back(policy);
continue;
}
result.dicts.emplace_back(dict);
result.names.emplace_back(featName);
result.values.emplace_back(&tape[index]);
result.values.emplace_back(tape[index]);
result.policies.emplace_back(policy);
}
}
......
......@@ -51,7 +51,7 @@ std::string FeatureModel::FeatureValue::toString(unsigned int i)
}
Dict * dict = dicts[i];
unsigned int index = dict->getValue(*values[i]);
unsigned int index = dict->getValue(values[i]);
float * realVector = (*dict->getLookupParameter().values())[index].batch_ptr(0);
unsigned int dim = dict->getDimension();
......@@ -71,8 +71,8 @@ std::string FeatureModel::FeatureDescription::toString()
for(auto featValue : values)
for (unsigned int i = 0; i < featValue.dicts.size(); i++)
{
int size1 = lengthPrinted(*featValue.names[i]);
int size2 = lengthPrinted(" " + std::string(*featValue.values[i]));
int size1 = lengthPrinted(featValue.names[i]);
int size2 = lengthPrinted(" " + std::string(featValue.values[i]));
int size3 = lengthPrinted(" " + featValue.toString(i));
columnSizes[0] = std::max(columnSizes[0], size1);
......@@ -92,10 +92,10 @@ std::string FeatureModel::FeatureDescription::toString()
for(auto featValue : values)
for (unsigned int i = 0; i < featValue.dicts.size(); i++)
{
column = *featValue.names[i];
column = featValue.names[i];
while ((int)lengthPrinted(column) < columnSizes[0]){column.push_back(' ');}
res += column;
column = " " + std::string(*featValue.values[i]);
column = " " + std::string(featValue.values[i]);
while ((int)lengthPrinted(column) < columnSizes[1]){column.push_back(' ');}
res += column;
column = " " + featValue.toString(i);
......@@ -121,7 +121,7 @@ const char * FeatureModel::policy2str(Policy policy)
return "null";
}
FeatureModel::FeatureValue::FeatureValue(Dict * dict, const std::string * name, const std::string * value, Policy policy)
FeatureModel::FeatureValue::FeatureValue(Dict * dict, const std::string & name, const std::string & value, Policy policy)
{
dicts.emplace_back(dict);
names.emplace_back(name);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment