DepthLayerTreeEmbeddingModule.cpp 5.18 KB
Newer Older
1
#include "DepthLayerTreeEmbeddingModule.hpp"
Franck Dary's avatar
Franck Dary committed
2

Franck Dary's avatar
Franck Dary committed
3
DepthLayerTreeEmbeddingModuleImpl::DepthLayerTreeEmbeddingModuleImpl(std::string name, const std::string & definition)
Franck Dary's avatar
Franck Dary committed
4
{
Franck Dary's avatar
Franck Dary committed
5
  setName(name);
Franck Dary's avatar
Franck Dary committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  std::regex regex("(?:(?:\\s|\\t)*)Columns\\{(.*)\\}(?:(?:\\s|\\t)*)Buffer\\{(.*)\\}(?:(?:\\s|\\t)*)Stack\\{(.*)\\}(?:(?:\\s|\\t)*)LayerSizes\\{(.*)\\}(?:(?:\\s|\\t)*)(\\S+)\\{(.*)\\}(?:(?:\\s|\\t)*)In\\{(.*)\\}(?:(?:\\s|\\t)*)Out\\{(.*)\\}(?:(?:\\s|\\t)*)");
  if (!util::doIfNameMatch(regex, definition, [this,&definition](auto sm)
        {
          try
          {
            columns = util::split(sm.str(1), ' ');

            for (auto & index : util::split(sm.str(2), ' '))
              focusedBuffer.emplace_back(std::stoi(index));

            for (auto & index : util::split(sm.str(3), ' '))
              focusedStack.emplace_back(std::stoi(index));

            for (auto & elem : util::split(sm.str(4), ' '))
              maxElemPerDepth.emplace_back(std::stoi(elem));

            auto subModuleType = sm.str(5);
            auto subModuleArguments = util::split(sm.str(6), ' ');

            auto options = MyModule::ModuleOptions(true)
              .bidirectional(std::stoi(subModuleArguments[0]))
              .num_layers(std::stoi(subModuleArguments[1]))
              .dropout(std::stof(subModuleArguments[2]))
              .complete(std::stoi(subModuleArguments[3]));

31
            inSize = std::stoi(sm.str(7));
Franck Dary's avatar
Franck Dary committed
32
33
34
35
36
37
38
39
40
            int outSize = std::stoi(sm.str(8));

            for (unsigned int i = 0; i < maxElemPerDepth.size(); i++)
            {
              std::string name = fmt::format("{}_{}", i, subModuleType);
              if (subModuleType == "LSTM")
                depthModules.emplace_back(register_module(name, LSTM(columns.size()*inSize, outSize, options)));
              else if (subModuleType == "GRU")
                depthModules.emplace_back(register_module(name, GRU(columns.size()*inSize, outSize, options)));
Franck Dary's avatar
Franck Dary committed
41
42
              else if (subModuleType == "Concat")
                depthModules.emplace_back(register_module(name, Concat(inSize)));
Franck Dary's avatar
Franck Dary committed
43
44
45
46
47
48
49
              else
                util::myThrow(fmt::format("unknown sumodule type '{}'", subModuleType));
            }

          } catch (std::exception & e) {util::myThrow(fmt::format("{} in '{}'",e.what(),definition));}
        }))
    util::myThrow(fmt::format("invalid definition '{}'", definition));
Franck Dary's avatar
Franck Dary committed
50
51
}

Franck Dary's avatar
Franck Dary committed
52
torch::Tensor DepthLayerTreeEmbeddingModuleImpl::forward(torch::Tensor input)
Franck Dary's avatar
Franck Dary committed
53
{
Franck Dary's avatar
Franck Dary committed
54
  auto context = wordEmbeddings(input.narrow(1, firstInputIndex, getInputSize()));
55
56
57

  std::vector<torch::Tensor> outputs;

58
59
60
61
  int offset = 0;
  for (unsigned int focused = 0; focused < focusedBuffer.size()+focusedStack.size(); focused++)
    for (unsigned int depth = 0; depth < maxElemPerDepth.size(); depth++)
    {
Franck Dary's avatar
Franck Dary committed
62
      outputs.emplace_back(depthModules[depth]->forward(context.narrow(1, offset, maxElemPerDepth[depth]*columns.size()).view({context.size(0), maxElemPerDepth[depth], (long)columns.size()*context.size(2)})));
63
64
      offset += maxElemPerDepth[depth]*columns.size();
    }
65
66
67
68

  return torch::cat(outputs, 1);
}

Franck Dary's avatar
Franck Dary committed
69
std::size_t DepthLayerTreeEmbeddingModuleImpl::getOutputSize()
70
71
{
  std::size_t outputSize = 0;
Franck Dary's avatar
Franck Dary committed
72

73
  for (unsigned int depth = 0; depth < maxElemPerDepth.size(); depth++)
74
    outputSize += depthModules[depth]->getOutputSize(maxElemPerDepth[depth]);
75

76
  return outputSize*(focusedBuffer.size()+focusedStack.size());
77
78
}

Franck Dary's avatar
Franck Dary committed
79
std::size_t DepthLayerTreeEmbeddingModuleImpl::getInputSize()
80
{
81
82
83
84
  int inputSize = 0;
  for (int maxElem : maxElemPerDepth)
    inputSize += (focusedBuffer.size()+focusedStack.size())*maxElem*columns.size();
  return inputSize;
Franck Dary's avatar
Franck Dary committed
85
86
}

Franck Dary's avatar
Franck Dary committed
87
void DepthLayerTreeEmbeddingModuleImpl::addToContext(std::vector<std::vector<long>> & context, const Config & config)
Franck Dary's avatar
Franck Dary committed
88
{
Franck Dary's avatar
Franck Dary committed
89
  auto & dict = getDict();
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  std::vector<long> focusedIndexes;

  for (int index : focusedBuffer)
    focusedIndexes.emplace_back(config.getRelativeWordIndex(index));

  for (int index : focusedStack)
    if (config.hasStack(index))
      focusedIndexes.emplace_back(config.getStack(index));
    else
      focusedIndexes.emplace_back(-1);

  for (auto & contextElement : context)
    for (auto index : focusedIndexes)
    {
104
      std::vector<std::string> childs{std::to_string(index)};
Franck Dary's avatar
Franck Dary committed
105

106
107
108
109
110
111
112
113
114
115
      for (unsigned int depth = 0; depth < maxElemPerDepth.size(); depth++)
      {
        std::vector<std::string> newChilds;
        for (auto & child : childs)
          if (config.has(Config::childsColName, std::stoi(child), 0))
          {
            auto val = util::split(config.getAsFeature(Config::childsColName, std::stoi(child)).get(), '|');
            newChilds.insert(newChilds.end(), val.begin(), val.end());
          }
        childs = newChilds;
116

117
118
        for (int i = 0; i < maxElemPerDepth[depth]; i++)
          for (auto & col : columns)
119
            if (i < (int)newChilds.size() and config.has(col, std::stoi(newChilds[i]), 0))
120
              contextElement.emplace_back(dict.getIndexOrInsert(config.getAsFeature(col,std::stoi(newChilds[i])), col));
121
            else
122
              contextElement.emplace_back(dict.getIndexOrInsert(Dict::nullValueStr, col));
123
      }
124
    }
Franck Dary's avatar
Franck Dary committed
125
126
}

127
void DepthLayerTreeEmbeddingModuleImpl::registerEmbeddings()
128
{
129
  wordEmbeddings = register_module("embeddings", WordEmbeddings(getDict().size(), inSize));
130
131
}