#include "UppercaseRateModule.hpp" #include "NeuralNetwork.hpp" UppercaseRateModuleImpl::UppercaseRateModuleImpl(std::string name, const std::string & definition) { setName(name); std::regex regex("(?:(?:\\s|\\t)*)Buffer\\{(.*)\\}(?:(?:\\s|\\t)*)Stack\\{(.*)\\}(?:(?:\\s|\\t)*)(\\S+)\\{(.*)\\}(?:(?:\\s|\\t)*)Out\\{(.*)\\}(?:(?:\\s|\\t)*)"); if (!util::doIfNameMatch(regex, definition, [this,&definition](auto sm) { try { for (auto & index : util::split(sm.str(1), ' ')) focusedBuffer.emplace_back(std::stoi(index)); for (auto & index : util::split(sm.str(2), ' ')) focusedStack.emplace_back(std::stoi(index)); auto subModuleType = sm.str(3); auto subModuleArguments = util::split(sm.str(4), ' '); auto options = MyModule::ModuleOptions(true) .bidirectional(std::stoi(subModuleArguments[0])) .num_layers(std::stoi(subModuleArguments[1])) .dropout(std::stof(subModuleArguments[2])) .complete(std::stoi(subModuleArguments[3])); int outSize = std::stoi(sm.str(5)); if (subModuleType == "LSTM") myModule = register_module("myModule", LSTM(1, outSize, options)); else if (subModuleType == "GRU") myModule = register_module("myModule", GRU(1, outSize, options)); else if (subModuleType == "Concat") myModule = register_module("myModule", Concat(1)); else util::myThrow(fmt::format("unknown sumodule type '{}'", subModuleType)); } catch (std::exception & e) {util::myThrow(fmt::format("{} in '{}'",e.what(),definition));} })) util::myThrow(fmt::format("invalid definition '{}'", definition)); } torch::Tensor UppercaseRateModuleImpl::forward(torch::Tensor input) { auto context = input.narrow(1, firstInputIndex, getInputSize()); auto values = torch::from_blob(context.data_ptr(), context.sizes(), context.strides(), torch::TensorOptions(torch::kDouble).requires_grad(false).device(NeuralNetworkImpl::device)).to(torch::kFloat).unsqueeze(-1).clone(); return myModule->forward(values).reshape({input.size(0), -1}); } std::size_t UppercaseRateModuleImpl::getOutputSize() { return myModule->getOutputSize(getInputSize()); } std::size_t UppercaseRateModuleImpl::getInputSize() { return focusedBuffer.size() + focusedStack.size(); } void UppercaseRateModuleImpl::addToContext(torch::Tensor & context, const Config & config) { std::vector<long> focusedIndexes; for (int index : focusedBuffer) focusedIndexes.emplace_back(config.getRelativeWordIndex(index)); for (int index : focusedStack) if (config.hasStack(index)) focusedIndexes.emplace_back(config.getStack(index)); else focusedIndexes.emplace_back(-1); int insertIndex = 0; for (auto index : focusedIndexes) { double res = -1.0; if (index >= 0) { auto word = util::splitAsUtf8(config.getAsFeature("FORM", index).get()); int nbUpper = 0; for (auto & letter : word) if (util::isUppercase(letter)) nbUpper++; if (word.size() > 0) res = 1.0*nbUpper/word.size(); } //TODO : Check if this works context[firstInputIndex+insertIndex] = res; insertIndex++; } } void UppercaseRateModuleImpl::registerEmbeddings() { }