Skip to content
Snippets Groups Projects
Commit fd4bc157 authored by Franck Dary's avatar Franck Dary
Browse files

Action EOS adds text metadata

parent 2b550c5a
Branches
Tags
No related merge requests found
......@@ -39,6 +39,7 @@ class Action
public :
static Action addLinesIfNeeded(int nbLines);
static Action addMetadataLinesIfNeeded();
static Action moveWordIndex(int movement);
static Action moveCharacterIndex(int movement);
static Action addHypothesis(const std::string & colName, std::size_t lineIndex, const std::string & hypothesis);
......
......@@ -50,6 +50,7 @@ class Config
Utf8String rawInput;
std::size_t wordIndex{0};
std::size_t characterIndex{0};
std::size_t currentSentenceStartRawInput{0};
String state{"NONE"};
boost::circular_buffer<String> history{10};
boost::circular_buffer<std::size_t> stack{50};
......@@ -164,6 +165,8 @@ class Config
bool isExtraColumn(const std::string & colName) const;
void setStrategy(const std::vector<std::string> & strategyDefinition);
Strategy & getStrategy();
std::size_t getCurrentSentenceStartRawInput() const;
void setCurrentSentenceStartRawInput(std::size_t value);
};
#endif
......@@ -29,6 +29,39 @@ Action Action::addLinesIfNeeded(int nbLines)
return {Type::AddLines, apply, undo, appliable};
}
Action Action::addMetadataLinesIfNeeded()
{
auto apply = [](Config & config, Action &)
{
if (!config.hasCharacter(0))
return;
if (config.rawInputOnlySeparatorsLeft())
return;
if (!config.has(0, config.getWordIndex()+1, 0))
config.addLines(1);
if (!config.has(0, config.getWordIndex()+2, 0))
config.addLines(1);
if (!config.has(0, config.getWordIndex()+3, 0))
config.addLines(1);
config.getLastNotEmptyHyp(0, config.getWordIndex()+1) = "#";
config.getLastNotEmptyHyp(0, config.getWordIndex()+2) = "#";
};
auto undo = [](Config &, Action &)
{
//TODO undo this
};
auto appliable = [](const Config &, const Action &)
{
return true;
};
return {Type::AddLines, apply, undo, appliable};
}
Action Action::moveWordIndex(int movement)
{
auto apply = [movement](Config & config, Action &)
......@@ -588,7 +621,7 @@ Action Action::updateIds(int bufferIndex)
break;
util::myThrow("The current sentence is too long to be completly held by the data strucure. Consider increasing SubConfig::SpanSize");
}
if (config.isComment(i) || config.isEmptyNode(i))
if (config.isCommentPredicted(i) || config.isEmptyNode(i))
continue;
if (config.getLastNotEmptyHypConst(Config::EOSColName, i) == Config::EOSSymbol1)
......@@ -605,7 +638,7 @@ Action Action::updateIds(int bufferIndex)
for (int i = firstIndexOfSentence, currentId = 1; i <= lineIndex; ++i)
{
if (config.isComment(i) || config.isEmptyNode(i))
if (config.isCommentPredicted(i) || config.isEmptyNode(i))
continue;
if (config.isMultiwordPredicted(i))
......@@ -615,6 +648,22 @@ Action Action::updateIds(int bufferIndex)
config.getFirstEmpty(Config::sentIdColName, i) = fmt::format("{}", lastSentId+1);
}
// Update metadata '# text = ...' and '# sent_id = X' before the sentence
if (config.hasCharacter(0))
{
if (config.has(0,firstIndexOfSentence-1,0) and config.isCommentPredicted(firstIndexOfSentence-1))
{
std::string textMetadata = "# text = ";
for (auto i = config.getCurrentSentenceStartRawInput(); i < config.getCharacterIndex(); i++)
textMetadata = fmt::format("{}{}", textMetadata, config.getLetter(i));
config.getLastNotEmptyHyp(0, firstIndexOfSentence-1) = textMetadata;
}
if (config.has(0,firstIndexOfSentence-2,0) and config.isCommentPredicted(firstIndexOfSentence-2))
config.getLastNotEmptyHyp(0, firstIndexOfSentence-2) = fmt::format("# sent_id = {}", config.getAsFeature(Config::sentIdColName, firstIndexOfSentence));
config.setCurrentSentenceStartRawInput(config.getCharacterIndex());
}
};
auto undo = [](Config & config, Action & a)
......@@ -774,7 +823,7 @@ Action Action::setRootUpdateIdsEmptyStackIfSentChanged()
for (int i = firstIndexOfSentence, currentId = 1; i <= lineIndex; ++i)
{
if (config.isComment(i) || config.isEmptyNode(i))
if (config.isCommentPredicted(i) || config.isEmptyNode(i))
continue;
if (config.isMultiwordPredicted(i))
......
......@@ -116,6 +116,7 @@ void BaseConfig::readTSVInput(std::string_view tsvFilename)
get(EOSColName, getNbLines()-1, 0) = EOSSymbol0;
get(isMultiColName, getNbLines()-1, 0) = EOSSymbol0;
get(0, getNbLines()-1, 0) = std::string(line);
getLastNotEmptyHyp(0, getNbLines()-1) = std::string(line);
continue;
}
......@@ -174,6 +175,7 @@ BaseConfig::BaseConfig(std::string mcd, std::string_view tsvFilename, std::strin
if (!has(0,wordIndex,0))
{
addComment();
addComment();
addLines(1);
}
......
......@@ -111,9 +111,9 @@ void Config::print(FILE * dest) const
for (unsigned int line = 0; line < getNbLines(); line++)
{
if (isComment(getFirstLineIndex()+line))
if (isCommentPredicted(getFirstLineIndex()+line))
{
currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0)));
currentSequenceComments.emplace_back(fmt::format("{}\n", getLastNotEmptyHypConst(0, getFirstLineIndex()+line)));
continue;
}
for (unsigned int i = 0; i < getNbColumns()-1; i++)
......@@ -171,7 +171,7 @@ void Config::printForDebug(FILE * dest) const
for (int line = firstLineToPrint; line <= lastLineToPrint; line++)
{
if (isComment(line))
if (isCommentPredicted(line))
continue;
toPrint.emplace_back();
toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : "");
......@@ -451,6 +451,7 @@ bool Config::moveWordIndex(int relativeMovement)
{
int nbMovements = 0;
int oldVal = wordIndex;
while (nbMovements != relativeMovement)
{
do
......@@ -462,7 +463,7 @@ bool Config::moveWordIndex(int relativeMovement)
return false;
}
}
while (isComment(wordIndex));
while (isCommentPredicted(wordIndex));
nbMovements += relativeMovement > 0 ? 1 : -1;
}
......@@ -481,11 +482,11 @@ void Config::moveWordIndexRelaxed(int relativeMovement)
break;
wordIndex += increment;
}
while (isComment(wordIndex));
while (isCommentPredicted(wordIndex));
nbMovements += relativeMovement > 0 ? 1 : -1;
}
if (!isComment(wordIndex))
if (!isCommentPredicted(wordIndex))
return;
moveWordIndex(-increment);
......@@ -503,7 +504,7 @@ bool Config::canMoveWordIndex(int relativeMovement) const
if (!has(0,oldVal,0))
return false;
}
while (isComment(oldVal));
while (isCommentPredicted(oldVal));
nbMovements += relativeMovement > 0 ? 1 : -1;
}
......@@ -784,3 +785,13 @@ Strategy & Config::getStrategy()
return *strategy.get();
}
std::size_t Config::getCurrentSentenceStartRawInput() const
{
return currentSentenceStartRawInput;
}
void Config::setCurrentSentenceStartRawInput(std::size_t value)
{
currentSentenceStartRawInput = value;
}
......@@ -700,6 +700,7 @@ void Transition::initReduce_relaxed()
void Transition::initEOS(int bufferIndex)
{
sequence.emplace_back(Action::addMetadataLinesIfNeeded());
sequence.emplace_back(Action::setRoot(bufferIndex));
sequence.emplace_back(Action::updateIds(bufferIndex));
sequence.emplace_back(Action::addHypothesisRelative(Config::EOSColName, Config::Object::Buffer, bufferIndex, Config::EOSSymbol1));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment