#include "FocusedColumnModule.hpp" FocusedColumnModuleImpl::FocusedColumnModuleImpl(std::string name, const std::string & definition) { setName(name); std::regex regex("(?:(?:\\s|\\t)*)Column\\{(.*)\\}(?:(?:\\s|\\t)*)NbElem\\{(.*)\\}(?:(?:\\s|\\t)*)Buffer\\{(.*)\\}(?:(?:\\s|\\t)*)Stack\\{(.*)\\}(?:(?:\\s|\\t)*)(\\S+)\\{(.*)\\}(?:(?:\\s|\\t)*)In\\{(.*)\\}(?:(?:\\s|\\t)*)Out\\{(.*)\\}(?:(?:\\s|\\t)*)"); if (!util::doIfNameMatch(regex, definition, [this,&definition](auto sm) { try { func = getFunction(sm.str(1)); column = util::split(sm.str(1), ':').back(); maxNbElements = std::stoi(sm.str(2)); for (auto & index : util::split(sm.str(3), ' ')) focusedBuffer.emplace_back(std::stoi(index)); for (auto & index : util::split(sm.str(4), ' ')) focusedStack.emplace_back(std::stoi(index)); auto subModuleType = sm.str(5); auto subModuleArguments = util::split(sm.str(6), ' '); auto options = MyModule::ModuleOptions(true) .bidirectional(std::stoi(subModuleArguments[0])) .num_layers(std::stoi(subModuleArguments[1])) .dropout(std::stof(subModuleArguments[2])) .complete(std::stoi(subModuleArguments[3])); inSize = std::stoi(sm.str(7)); int outSize = std::stoi(sm.str(8)); if (subModuleType == "LSTM") myModule = register_module("myModule", LSTM(inSize, outSize, options)); else if (subModuleType == "GRU") myModule = register_module("myModule", GRU(inSize, outSize, options)); else if (subModuleType == "Concat") myModule = register_module("myModule", Concat(inSize)); else util::myThrow(fmt::format("unknown sumodule type '{}'", subModuleType)); } catch (std::exception & e) {util::myThrow(fmt::format("{} in '{}'",e.what(),definition));} })) util::myThrow(fmt::format("invalid definition '{}'", definition)); } torch::Tensor FocusedColumnModuleImpl::forward(torch::Tensor input) { std::vector<torch::Tensor> outputs; for (unsigned int i = 0; i < focusedBuffer.size()+focusedStack.size(); i++) outputs.emplace_back(myModule->forward(wordEmbeddings(input.narrow(1, firstInputIndex+i*maxNbElements, maxNbElements)))); return torch::cat(outputs, 1); } std::size_t FocusedColumnModuleImpl::getOutputSize() { return (focusedBuffer.size()+focusedStack.size())*myModule->getOutputSize(maxNbElements); } std::size_t FocusedColumnModuleImpl::getInputSize() { return (focusedBuffer.size()+focusedStack.size()) * maxNbElements; } void FocusedColumnModuleImpl::addToContext(std::vector<std::vector<long>> & context, const Config & config) { auto & dict = getDict(); std::vector<long> focusedIndexes; for (int index : focusedBuffer) focusedIndexes.emplace_back(config.getRelativeWordIndex(index)); for (int index : focusedStack) if (config.hasStack(index)) focusedIndexes.emplace_back(config.getStack(index)); else focusedIndexes.emplace_back(-1); for (auto & contextElement : context) { for (auto index : focusedIndexes) { if (index == -1) { for (int i = 0; i < maxNbElements; i++) contextElement.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr)); continue; } std::vector<std::string> elements; if (column == "FORM") { auto asUtf8 = util::splitAsUtf8(func(config.getAsFeature(column, index).get())); for (int i = 0; i < maxNbElements; i++) if (i < (int)asUtf8.size()) elements.emplace_back(fmt::format("{}", asUtf8[i])); else elements.emplace_back(Dict::nullValueStr); } else if (column == "FEATS") { auto splited = util::split(func(config.getAsFeature(column, index).get()), '|'); for (int i = 0; i < maxNbElements; i++) if (i < (int)splited.size()) elements.emplace_back(fmt::format("FEATS({})", splited[i])); else elements.emplace_back(Dict::nullValueStr); } else if (column == "ID") { if (config.isTokenPredicted(index)) elements.emplace_back("ID(TOKEN)"); else if (config.isMultiwordPredicted(index)) elements.emplace_back("ID(MULTIWORD)"); else if (config.isEmptyNodePredicted(index)) elements.emplace_back("ID(EMPTYNODE)"); } else if (column == "EOS") { bool isEOS = func(config.getAsFeature(Config::EOSColName, index)) == Config::EOSSymbol1; elements.emplace_back(fmt::format("EOS({})", isEOS)); } else { elements.emplace_back(func(config.getAsFeature(column, index))); } if ((int)elements.size() != maxNbElements) util::myThrow(fmt::format("elements.size ({}) != maxNbElements ({})", elements.size(), maxNbElements)); for (auto & element : elements) contextElement.emplace_back(dict.getIndexOrInsert(element)); } } } void FocusedColumnModuleImpl::registerEmbeddings() { wordEmbeddings = register_module("embeddings", torch::nn::Embedding(torch::nn::EmbeddingOptions(getDict().size(), inSize))); }