Newer
Older
FocusedColumnModuleImpl::FocusedColumnModuleImpl(std::string name, const std::string & definition)
std::regex regex("(?:(?:\\s|\\t)*)Column\\{(.*)\\}(?:(?:\\s|\\t)*)NbElem\\{(.*)\\}(?:(?:\\s|\\t)*)Buffer\\{(.*)\\}(?:(?:\\s|\\t)*)Stack\\{(.*)\\}(?:(?:\\s|\\t)*)(\\S+)\\{(.*)\\}(?:(?:\\s|\\t)*)In\\{(.*)\\}(?:(?:\\s|\\t)*)Out\\{(.*)\\}(?:(?:\\s|\\t)*)");
if (!util::doIfNameMatch(regex, definition, [this,&definition](auto sm)
{
try
{
Franck Dary
committed
func = getFunction(sm.str(1));
column = util::split(sm.str(1), ':').back();
maxNbElements = std::stoi(sm.str(2));
for (auto & index : util::split(sm.str(3), ' '))
focusedBuffer.emplace_back(std::stoi(index));
for (auto & index : util::split(sm.str(4), ' '))
focusedStack.emplace_back(std::stoi(index));
auto subModuleType = sm.str(5);
auto subModuleArguments = util::split(sm.str(6), ' ');
auto options = MyModule::ModuleOptions(true)
.bidirectional(std::stoi(subModuleArguments[0]))
.num_layers(std::stoi(subModuleArguments[1]))
.dropout(std::stof(subModuleArguments[2]))
.complete(std::stoi(subModuleArguments[3]));
Franck Dary
committed
inSize = std::stoi(sm.str(7));
int outSize = std::stoi(sm.str(8));
if (subModuleType == "LSTM")
myModule = register_module("myModule", LSTM(inSize, outSize, options));
else if (subModuleType == "GRU")
myModule = register_module("myModule", GRU(inSize, outSize, options));
else if (subModuleType == "Concat")
myModule = register_module("myModule", Concat(inSize));
else
util::myThrow(fmt::format("unknown sumodule type '{}'", subModuleType));
} catch (std::exception & e) {util::myThrow(fmt::format("{} in '{}'",e.what(),definition));}
}))
util::myThrow(fmt::format("invalid definition '{}'", definition));
torch::Tensor FocusedColumnModuleImpl::forward(torch::Tensor input)
{
std::vector<torch::Tensor> outputs;
for (unsigned int i = 0; i < focusedBuffer.size()+focusedStack.size(); i++)
outputs.emplace_back(myModule->forward(wordEmbeddings(input.narrow(1, firstInputIndex+i*maxNbElements, maxNbElements))));
std::size_t FocusedColumnModuleImpl::getOutputSize()
return (focusedBuffer.size()+focusedStack.size())*myModule->getOutputSize(maxNbElements);
std::size_t FocusedColumnModuleImpl::getInputSize()
{
return (focusedBuffer.size()+focusedStack.size()) * maxNbElements;
}
void FocusedColumnModuleImpl::addToContext(std::vector<std::vector<long>> & context, const Config & config)
std::vector<long> focusedIndexes;
for (int index : focusedBuffer)
focusedIndexes.emplace_back(config.getRelativeWordIndex(index));
for (int index : focusedStack)
if (config.hasStack(index))
focusedIndexes.emplace_back(config.getStack(index));
else
focusedIndexes.emplace_back(-1);
for (auto & contextElement : context)
{
for (auto index : focusedIndexes)
{
if (index == -1)
{
for (int i = 0; i < maxNbElements; i++)
contextElement.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr));
continue;
}
std::vector<std::string> elements;
if (column == "FORM")
{
auto asUtf8 = util::splitAsUtf8(func(config.getAsFeature(column, index).get()));
for (int i = 0; i < maxNbElements; i++)
if (i < (int)asUtf8.size())
elements.emplace_back(fmt::format("{}", asUtf8[i]));
else
elements.emplace_back(Dict::nullValueStr);
}
else if (column == "FEATS")
{
auto splited = util::split(func(config.getAsFeature(column, index).get()), '|');
for (int i = 0; i < maxNbElements; i++)
if (i < (int)splited.size())
elements.emplace_back(fmt::format("FEATS({})", splited[i]));
else
elements.emplace_back(Dict::nullValueStr);
}
else if (column == "ID")
{
if (config.isTokenPredicted(index))
elements.emplace_back("ID(TOKEN)");
else if (config.isMultiwordPredicted(index))
elements.emplace_back("ID(MULTIWORD)");
else if (config.isEmptyNodePredicted(index))
elements.emplace_back("ID(EMPTYNODE)");
}
Franck Dary
committed
else if (column == "EOS")
{
bool isEOS = func(config.getAsFeature(Config::EOSColName, index)) == Config::EOSSymbol1;
Franck Dary
committed
elements.emplace_back(fmt::format("EOS({})", isEOS));
}
elements.emplace_back(func(config.getAsFeature(column, index)));
}
if ((int)elements.size() != maxNbElements)
util::myThrow(fmt::format("elements.size ({}) != maxNbElements ({})", elements.size(), maxNbElements));
for (auto & element : elements)
contextElement.emplace_back(dict.getIndexOrInsert(element));
}
}
}
Franck Dary
committed
void FocusedColumnModuleImpl::registerEmbeddings()
Franck Dary
committed
{
wordEmbeddings = register_module("embeddings", torch::nn::Embedding(torch::nn::EmbeddingOptions(getDict().size(), inSize)));