Newer
Older
ContextModuleImpl::ContextModuleImpl(std::string name, const std::string & definition)
Franck Dary
committed
std::regex regex("(?:(?:\\s|\\t)*)Targets\\{(.*)\\}(?:(?:\\s|\\t)*)Columns\\{(.*)\\}(?:(?:\\s|\\t)*)(\\S+)\\{(.*)\\}(?:(?:\\s|\\t)*)In\\{(.*)\\}(?:(?:\\s|\\t)*)Out\\{(.*)\\}(?:(?:\\s|\\t)*)w2v\\{(.*)\\}(?:(?:\\s|\\t)*)");
if (!util::doIfNameMatch(regex, definition, [this,&definition](auto sm)
{
try
{
for (auto & target : util::split(sm.str(1), ' '))
{
auto splited = util::split(target, '.');
if (splited.size() != 2 and splited.size() != 3)
util::myThrow(fmt::format("invalid target '{}' expected 'object.index(.childIndex)'", target));
targets.emplace_back(std::make_tuple(Config::str2object(splited[0]), std::stoi(splited[1]), splited.size() == 3 ? std::optional<int>(std::stoi(splited[2])) : std::optional<int>()));
}
auto funcColumns = util::split(sm.str(2), ' ');
columns.clear();
for (auto & funcCol : funcColumns)
{
Franck Dary
committed
functions.emplace_back() = getFunction(funcCol);
columns.emplace_back(util::split(funcCol, ':').back());
auto subModuleType = sm.str(3);
auto subModuleArguments = util::split(sm.str(4), ' ');
auto options = MyModule::ModuleOptions(true)
.bidirectional(std::stoi(subModuleArguments[0]))
.num_layers(std::stoi(subModuleArguments[1]))
.dropout(std::stof(subModuleArguments[2]))
.complete(std::stoi(subModuleArguments[3]));
inSize = std::stoi(sm.str(5));
int outSize = std::stoi(sm.str(6));
if (subModuleType == "LSTM")
myModule = register_module("myModule", LSTM(columns.size()*inSize, outSize, options));
else if (subModuleType == "GRU")
myModule = register_module("myModule", GRU(columns.size()*inSize, outSize, options));
else if (subModuleType == "Concat")
myModule = register_module("myModule", Concat(inSize));
else
util::myThrow(fmt::format("unknown sumodule type '{}'", subModuleType));
Franck Dary
committed
if (!w2vFile.empty())
{
getDict().loadWord2Vec(w2vFile);
getDict().setState(Dict::State::Closed);
dictSetPretrained(true);
}
} catch (std::exception & e) {util::myThrow(fmt::format("{} in '{}'",e.what(),definition));}
}))
util::myThrow(fmt::format("invalid definition '{}'", definition));
}
std::size_t ContextModuleImpl::getOutputSize()
{
return myModule->getOutputSize(targets.size());
}
std::size_t ContextModuleImpl::getInputSize()
{
return columns.size()*(targets.size());
void ContextModuleImpl::addToContext(std::vector<std::vector<long>> & context, const Config & config)
for (auto & target : targets)
if (config.hasRelativeWordIndex(std::get<0>(target), std::get<1>(target)))
{
int baseIndex = config.getRelativeWordIndex(std::get<0>(target), std::get<1>(target));
if (!std::get<2>(target))
contextIndexes.emplace_back(baseIndex);
else
{
int childIndex = *std::get<2>(target);
auto childs = util::split(config.getAsFeature(Config::childsColName, baseIndex).get(), '|');
if (childIndex >= 0 and childIndex < (int)childs.size())
contextIndexes.emplace_back(std::stoi(childs[childIndex]));
else if (childIndex < 0 and ((int)childs.size())+childIndex >= 0)
contextIndexes.emplace_back(std::stoi(childs[childs.size()+childIndex]));
else
contextIndexes.emplace_back(-1);
}
}
else
contextIndexes.emplace_back(-1);
for (auto index : contextIndexes)
for (unsigned int colIndex = 0; colIndex < columns.size(); colIndex++)
{
auto & col = columns[colIndex];
if (index == -1)
{
for (auto & contextElement : context)
contextElement.push_back(dict.getIndexOrInsert(Dict::nullValueStr));
}
else
{
Franck Dary
committed
int dictIndex = dict.getIndexOrInsert(functions[colIndex](config.getAsFeature(col, index)));
for (auto & contextElement : context)
contextElement.push_back(dictIndex);
}
}
torch::Tensor ContextModuleImpl::forward(torch::Tensor input)
{
auto context = wordEmbeddings(input.narrow(1, firstInputIndex, getInputSize()));
context = context.view({context.size(0), context.size(1)/(int)columns.size(), (int)columns.size()*context.size(2)});
return myModule->forward(context);
}
Franck Dary
committed
void ContextModuleImpl::registerEmbeddings()
Franck Dary
committed
{
wordEmbeddings = register_module("embeddings", torch::nn::Embedding(torch::nn::EmbeddingOptions(getDict().size(), inSize)));
Franck Dary
committed
loadPretrainedW2vEmbeddings(wordEmbeddings, w2vFile);