#include "DistanceModule.hpp" DistanceModuleImpl::DistanceModuleImpl(std::string name, const std::string & definition) { setName(name); std::regex regex("(?:(?:\\s|\\t)*)FromBuffer\\{(.*)\\}(?:(?:\\s|\\t)*)FromStack\\{(.*)\\}(?:(?:\\s|\\t)*)ToBuffer\\{(.*)\\}(?:(?:\\s|\\t)*)ToStack\\{(.*)\\}(?:(?:\\s|\\t)*)Threshold\\{(.*)\\}(?:(?:\\s|\\t)*)(\\S+)\\{(.*)\\}(?:(?:\\s|\\t)*)In\\{(.*)\\}(?:(?:\\s|\\t)*)Out\\{(.*)\\}(?:(?:\\s|\\t)*)"); if (!util::doIfNameMatch(regex, definition, [this,&definition](auto sm) { try { for (auto & index : util::split(sm.str(1), ' ')) fromBuffer.emplace_back(std::stoi(index)); for (auto & index : util::split(sm.str(2), ' ')) fromStack.emplace_back(std::stoi(index)); for (auto & index : util::split(sm.str(3), ' ')) toBuffer.emplace_back(std::stoi(index)); for (auto & index : util::split(sm.str(4), ' ')) toStack.emplace_back(std::stoi(index)); threshold = std::stoi(sm.str(5)); auto subModuleType = sm.str(6); auto subModuleArguments = util::split(sm.str(7), ' '); auto options = MyModule::ModuleOptions(true) .bidirectional(std::stoi(subModuleArguments[0])) .num_layers(std::stoi(subModuleArguments[1])) .dropout(std::stof(subModuleArguments[2])) .complete(std::stoi(subModuleArguments[3])); inSize = std::stoi(sm.str(8)); int outSize = std::stoi(sm.str(9)); if (subModuleType == "LSTM") myModule = register_module("myModule", LSTM(inSize, outSize, options)); else if (subModuleType == "GRU") myModule = register_module("myModule", GRU(inSize, outSize, options)); else if (subModuleType == "Concat") myModule = register_module("myModule", Concat(inSize)); else util::myThrow(fmt::format("unknown sumodule type '{}'", subModuleType)); } catch (std::exception & e) {util::myThrow(fmt::format("{} in '{}'",e.what(),definition));} })) util::myThrow(fmt::format("invalid definition '{}'", definition)); } torch::Tensor DistanceModuleImpl::forward(torch::Tensor input) { return myModule->forward(wordEmbeddings(input.narrow(1, firstInputIndex, getInputSize()))); } std::size_t DistanceModuleImpl::getOutputSize() { return myModule->getOutputSize(getInputSize()); } std::size_t DistanceModuleImpl::getInputSize() { return (fromBuffer.size()+fromStack.size()) * (toBuffer.size()+toStack.size()); } void DistanceModuleImpl::addToContext(std::vector<std::vector<long>> & context, const Config & config) { auto & dict = getDict(); std::vector<long> fromIndexes, toIndexes; for (int index : fromBuffer) fromIndexes.emplace_back(config.getRelativeWordIndex(index)); for (int index : fromStack) if (config.hasStack(index)) fromIndexes.emplace_back(config.getStack(index)); else fromIndexes.emplace_back(-1); for (int index : toBuffer) toIndexes.emplace_back(config.getRelativeWordIndex(index)); for (int index : toStack) if (config.hasStack(index)) toIndexes.emplace_back(config.getStack(index)); else toIndexes.emplace_back(-1); for (auto & contextElement : context) { for (auto from : fromIndexes) for (auto to : toIndexes) { if (from == -1 or to == -1) { contextElement.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr)); continue; } long dist = std::abs(config.getRelativeDistance(from, to)); if (dist <= threshold) contextElement.emplace_back(dict.getIndexOrInsert(fmt::format("distance({})", dist))); else contextElement.emplace_back(dict.getIndexOrInsert(Dict::unknownValueStr)); } } } void DistanceModuleImpl::registerEmbeddings() { wordEmbeddings = register_module("embeddings", torch::nn::Embedding(torch::nn::EmbeddingOptions(getDict().size(), inSize))); }