Newer
Older
#include "RTLSTMNetwork.hpp"
RTLSTMNetworkImpl::RTLSTMNetworkImpl(int nbOutputs, int leftBorder, int rightBorder, int nbStackElements)
{
constexpr int embeddingsSize = 30;
constexpr int lstmOutputSize = 128;
constexpr int treeEmbeddingsSize = 256;
setLeftBorder(leftBorder);
setRightBorder(rightBorder);
setNbStackElements(nbStackElements);
wordEmbeddings = register_module("word_embeddings", torch::nn::Embedding(torch::nn::EmbeddingOptions(50000, embeddingsSize)));
linear1 = register_module("linear1", torch::nn::Linear(treeEmbeddingsSize*(focusedBufferIndexes.size()+focusedStackIndexes.size()), hiddenSize));
linear2 = register_module("linear2", torch::nn::Linear(hiddenSize, nbOutputs));
vectorBiLSTM = register_module("vector_lstm", torch::nn::LSTM(torch::nn::LSTMOptions(embeddingsSize*columns.size(), lstmOutputSize).batch_first(true).bidirectional(true)));
treeLSTM = register_module("tree_lstm", torch::nn::LSTM(torch::nn::LSTMOptions(treeEmbeddingsSize+2*lstmOutputSize, treeEmbeddingsSize).batch_first(true).bidirectional(false)));
S = register_parameter("S", torch::randn(treeEmbeddingsSize));
nullTree = register_parameter("null_tree", torch::randn(treeEmbeddingsSize));
}
torch::Tensor RTLSTMNetworkImpl::forward(torch::Tensor input)
{
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
input = input.squeeze();
if (input.dim() != 1)
util::myThrow(fmt::format("Does not support batched input (dim()={})", input.dim()));
auto focusedIndexes = input.narrow(0, 0, focusedBufferIndexes.size()+focusedStackIndexes.size());
auto computeOrder = input.narrow(0, focusedIndexes.size(0), leftBorder+rightBorder+1);
auto childsFlat = input.narrow(0, focusedIndexes.size(0)+computeOrder.size(0), maxNbChilds*(leftBorder+rightBorder+1));
auto childs = torch::reshape(childsFlat, {computeOrder.size(0), maxNbChilds});
auto wordIndexes = input.narrow(0, focusedIndexes.size(0)+computeOrder.size(0)+childsFlat.size(0), columns.size()*(leftBorder+rightBorder+1));
auto baseEmbeddings = wordEmbeddings(wordIndexes);
auto concatBaseEmbeddings = torch::reshape(baseEmbeddings, {(int)baseEmbeddings.size(0)/(int)columns.size(), (int)baseEmbeddings.size(1)*(int)columns.size()}).unsqueeze(0);
auto vectorRepresentations = vectorBiLSTM(concatBaseEmbeddings).output.squeeze();
std::vector<torch::Tensor> treeRepresentations(vectorRepresentations.size(0), nullTree);
for (unsigned int i = 0; i < computeOrder.size(0); i++)
{
int index = computeOrder[i].item<int>();
if (index == -1)
break;
std::vector<torch::Tensor> inputVector;
inputVector.emplace_back(torch::cat({vectorRepresentations[index], S}, 0));
for (unsigned int childIndex = 0; childIndex < maxNbChilds; childIndex++)
{
int child = childs[index][childIndex].item<int>();
if (child == -1)
break;
inputVector.emplace_back(torch::cat({vectorRepresentations[index], treeRepresentations[child]}, 0));
}
auto lstmInput = torch::stack(inputVector, 0).unsqueeze(0);
auto lstmOut = treeLSTM(lstmInput).output.permute({1,0,2})[-1].squeeze();
treeRepresentations[index] = lstmOut;
}
std::vector<torch::Tensor> focusedTrees;
for (unsigned int i = 0; i < focusedIndexes.size(0); i++)
{
int index = focusedIndexes[i].item<int>();
if (index == -1)
focusedTrees.emplace_back(nullTree);
else
focusedTrees.emplace_back(treeRepresentations[index]);
}
auto representation = torch::cat(focusedTrees, 0);
return linear2(torch::relu(linear1(representation)));
}
std::vector<long> RTLSTMNetworkImpl::extractContext(Config & config, Dict & dict) const
{
std::vector<long> contextIndexes;
std::stack<int> leftContext;
for (int index = config.getWordIndex()-1; config.has(0,index,0) && (int)leftContext.size() < leftBorder; --index)
if (config.isToken(index))
leftContext.push(index);
while ((int)contextIndexes.size() < leftBorder-(int)leftContext.size())
contextIndexes.emplace_back(-1);
while (!leftContext.empty())
{
contextIndexes.emplace_back(leftContext.top());
leftContext.pop();
}
for (int index = config.getWordIndex(); config.has(0,index,0) && (int)contextIndexes.size() < leftBorder+rightBorder+1; ++index)
if (config.isToken(index))
contextIndexes.emplace_back(index);
while ((int)contextIndexes.size() < leftBorder+rightBorder+1)
contextIndexes.emplace_back(-1);
std::map<long, long> indexInContext;
for (auto & l : contextIndexes)
indexInContext.emplace(std::make_pair(l, indexInContext.size()));
std::vector<long> headOf;
for (auto & l : contextIndexes)
{
if (l == -1)
headOf.push_back(-1);
else
{
auto & head = config.getAsFeature(Config::headColName, l);
if (util::isEmpty(head) or head == "_")
headOf.push_back(-1);
else if (indexInContext.count(std::stoi(head)))
headOf.push_back(std::stoi(head));
else
headOf.push_back(-1);
}
}
std::vector<std::vector<long>> childs(headOf.size());
for (unsigned int i = 0; i < headOf.size(); i++)
if (headOf[i] != -1)
childs[indexInContext[headOf[i]]].push_back(contextIndexes[i]);
std::vector<long> treeComputationOrder;
std::vector<bool> treeIsComputed(contextIndexes.size(), false);
std::function<void(long)> depthFirst;
depthFirst = [&config, &depthFirst, &indexInContext, &treeComputationOrder, &treeIsComputed, &childs](long root)
{
if (!indexInContext.count(root))
return;
if (treeIsComputed[indexInContext[root]])
return;
for (auto child : childs[indexInContext[root]])
depthFirst(child);
treeIsComputed[indexInContext[root]] = true;
treeComputationOrder.push_back(indexInContext[root]);
};
for (auto & l : focusedBufferIndexes)
if (contextIndexes[leftBorder+l] != -1)
depthFirst(contextIndexes[leftBorder+l]);
for (auto & l : focusedStackIndexes)
if (config.hasStack(l))
depthFirst(config.getStack(l));
std::vector<long> context;
for (auto & c : focusedBufferIndexes)
context.push_back(leftBorder+c);
for (auto & c : focusedStackIndexes)
if (config.hasStack(c) && indexInContext.count(config.getStack(c)))
context.push_back(indexInContext[config.getStack(c)]);
else
context.push_back(-1);
for (auto & c : treeComputationOrder)
context.push_back(c);
while (context.size() < contextIndexes.size()+focusedBufferIndexes.size()+focusedStackIndexes.size())
context.push_back(-1);
for (auto & c : childs)
{
for (unsigned int i = 0; i < maxNbChilds; i++)
if (i < c.size())
context.push_back(indexInContext[c[i]]);
else
context.push_back(-1);
}
for (auto & l : contextIndexes)
for (auto & col : columns)
if (l == -1)
context.push_back(dict.getIndexOrInsert(Dict::nullValueStr));
else
context.push_back(dict.getIndexOrInsert(config.getAsFeature(col, l)));
return context;