Skip to content
Snippets Groups Projects
Commit f7b0f0f3 authored by Franck Dary's avatar Franck Dary
Browse files

Added a way to not use sequences when shuffling

parent c9a28275
No related branches found
No related tags found
No related merge requests found
......@@ -80,7 +80,7 @@ po::options_description getTrainOptionsDescription()
("randomParameters", po::value<bool>()->default_value(true),
"When activated, the parameters will be randomly initialized")
("sequenceDelimiterTape", po::value<std::string>()->default_value("EOS"),
"The name of the buffer's tape that contains the delimiter token for a sequence")
"The name of the buffer's tape that contains the delimiter token for a sequence, or 0 not to use sequences")
("sequenceDelimiter", po::value<std::string>()->default_value("1"),
"The value of the token that act as a delimiter for sequences")
("batchSize", po::value<int>()->default_value(50),
......
......@@ -327,9 +327,20 @@ LimitedStack<float> & Config::getCurrentStateEntropyHistory()
void Config::shuffle(const std::string & delimiterTape, const std::string & delimiter)
{
auto & tape = getTape(delimiterTape);
std::vector< std::pair<unsigned int, unsigned int> > delimiters;
if (delimiterTape == "0")
{
unsigned int previousIndex = 0;
for (int i = 0; i < tapes[0].refSize(); i++)
{
delimiters.emplace_back(previousIndex, i);
previousIndex = i+1;
}
}
else
{
auto & tape = getTape(delimiterTape);
unsigned int previousIndex = 0;
for (int i = 0; i < tape.refSize(); i++)
if (tape.getRef(i-head) == delimiter)
......@@ -337,14 +348,15 @@ void Config::shuffle(const std::string & delimiterTape, const std::string & deli
delimiters.emplace_back(previousIndex, i);
previousIndex = i+1;
}
}
if (delimiters.empty())
{
fprintf(stderr, "ERROR (%s) : Requested to shuffle based on tape \'%s\' with \'%s\' as a delimiter, but none as been found. Aborting.\n", ERRINFO, delimiterTape.c_str(), delimiter.c_str());
exit(1);
fprintf(stderr, "WARNING (%s) : Requested to shuffle based on tape \'%s\' with \'%s\' as a delimiter, but none has been found. Aborting.\n", ERRINFO, delimiterTape.c_str(), delimiter.c_str());
return;
}
std::pair<unsigned int, unsigned int> suffix = {delimiters.back().second+1, tape.refSize()-1};
std::pair<unsigned int, unsigned int> suffix = {delimiters.back().second+1, tapes[0].refSize()-1};
std::random_shuffle(delimiters.begin(), delimiters.end());
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment