Newer
Older
Config::Config(const Config & other)
{
this->lines = other.lines;
this->predicted = other.predicted;
this->lastPoppedStack = other.lastPoppedStack;
this->lastAttached = other.lastAttached;
this->currentWordId = other.currentWordId;
this->appliableSplitTransitions = other.appliableSplitTransitions;
this->appliableTransitions = other.appliableTransitions;
this->strategy.reset(new Strategy(*other.strategy));
this->rawInput = other.rawInput;
this->wordIndex = other.wordIndex;
this->characterIndex = other.characterIndex;
this->state = other.state;
this->history = other.history;
this->stack = other.stack;
this->extraColumns = this->extraColumns;
}
std::size_t Config::getIndexOfLine(int lineIndex) const
return lineIndex * getNbColumns() * (nbHypothesesMax+1);
std::size_t Config::getIndexOfCol(int colIndex) const
return colIndex * (nbHypothesesMax+1);
void Config::addLines(unsigned int nbLines)
lines.resize(lines.size() + nbLines*getNbColumns()*(nbHypothesesMax+1));
Franck Dary
committed
void Config::addComment()
{
lines.resize(lines.size() + getNbColumns()*(nbHypothesesMax+1));
get(0, getNbLines()-1, 0) = "#";
getLastNotEmptyHyp(0, getNbLines()-1) = "#";
}
void Config::resizeLines(unsigned int nbLines)
lines.resize(nbLines*getNbColumns()*(nbHypothesesMax+1));
bool Config::has(int colIndex, int lineIndex, int hypothesisIndex) const
{
return colIndex >= 0 && colIndex < (int)getNbColumns() && lineIndex >= (int)getFirstLineIndex() && lineIndex < (int)getFirstLineIndex() + (int)getNbLines() && hypothesisIndex >= 0 && hypothesisIndex < nbHypothesesMax+1;
}
bool Config::has(const std::string & colName, int lineIndex, int hypothesisIndex) const
{
return hasColIndex(colName) && has(getColIndex(colName), lineIndex, hypothesisIndex);
}
Config::String & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex)
return get(getColIndex(colName), lineIndex, hypothesisIndex);
const Config::String & Config::getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const
{
return getConst(getColIndex(colName), lineIndex, hypothesisIndex);
}
Config::String & Config::get(int colIndex, int lineIndex, int hypothesisIndex)
{
return *getIterator(colIndex, lineIndex, hypothesisIndex);
}
const Config::String & Config::getConst(int colIndex, int lineIndex, int hypothesisIndex) const
{
return *getConstIterator(colIndex, lineIndex, hypothesisIndex);
}
std::size_t Config::getNbLines() const
{
return lines.size() / getIndexOfCol(getNbColumns());
}
{
std::vector<std::string> currentSequence;
std::vector<std::string> currentSequenceComments;
auto flushCurrentSequence = [&dest, ¤tSequence, ¤tSequenceComments]()
{
if (currentSequence.empty() && currentSequenceComments.empty())
return;
for (auto & comment : currentSequenceComments)
fmt::print(dest, "{}", comment);
for (auto & line : currentSequence)
fmt::print(dest, "{}", line);
fmt::print(dest, "\n");
currentSequence.clear();
currentSequenceComments.clear();
};
for (unsigned int line = 0; line < getNbLines(); line++)
{
currentSequenceComments.emplace_back(fmt::format("{}\n", getConst(0, getFirstLineIndex()+line, 0)));
for (unsigned int i = 0; i < getNbColumns()-1; i++)
if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName)
{
if (i == getNbColumns()-2)
currentSequence.back().back() = '\n';
continue;
}
Franck Dary
committed
auto & colContent = getAsFeature(i, getFirstLineIndex()+line);
std::string valueToPrint = colContent;
try
{
if (getColName(i) == headColName)
if (valueToPrint != "0")
Franck Dary
committed
valueToPrint = getAsFeature(idColName, std::stoi(valueToPrint));
if (valueToPrint.empty())
valueToPrint = "_";
currentSequence.emplace_back(fmt::format("{}{}", valueToPrint, i < getNbColumns()-2 ? "\t" : "\n"));
Franck Dary
committed
auto & eosColContent = getAsFeature(EOSColName, getFirstLineIndex()+line);
if (eosColContent == EOSSymbol1)
flushCurrentSequence();
}
}
void Config::printForDebug(FILE * dest) const
{
static constexpr int maxWordLength = 7;
int firstLineToPrint = wordIndex;
int lastLineToPrint = wordIndex;
while (wordIndex-firstLineToPrint < windowSize and has(0, firstLineToPrint-1, 0))
--firstLineToPrint;
while (lastLineToPrint - wordIndex < windowSize and has(0, lastLineToPrint+1, 0))
++lastLineToPrint;
std::vector<std::vector<std::string>> toPrint;
toPrint.emplace_back();
toPrint.back().emplace_back("");
for (unsigned int i = 0; i < getNbColumns(); i++)
if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName)
for (int line = firstLineToPrint; line <= lastLineToPrint; line++)
{
toPrint.emplace_back();
toPrint.back().emplace_back(line == (int)wordIndex ? "=>" : "");
for (unsigned int i = 0; i < getNbColumns(); i++)
if (isExtraColumn(getColName(i)) and getColName(i) != EOSColName)
std::string colContent = has(i,line,0) ? getAsFeature(i, line).get() : "?";
std::string toPrintCol = colContent;
try
{
if (getColName(i) == headColName && toPrintCol != "_" && !toPrintCol.empty())
if (toPrintCol != "0" && toPrintCol != "?")
toPrintCol = has(0,std::stoi(toPrintCol),0) ? getAsFeature(idColName, std::stoi(toPrintCol)).get() : "?";
Franck Dary
committed
} catch(std::exception & e) {util::myThrow(fmt::format("toPrintCol='{}' {}", toPrintCol, e.what()));}
toPrint.back().emplace_back(util::shrink(toPrintCol, maxWordLength));
}
std::vector<std::size_t> colLength(toPrint[0].size(), 0);
for (auto & line : toPrint)
for (unsigned int col = 0; col < line.size()-1; col++)
colLength[col] = std::max((int)colLength[col], util::printedLength(line[col]));
int lengthSum = 2*getNbColumns();
for (auto & val : colLength)
lengthSum += val;
std::string longLine = fmt::format("{:-<{}}", "", lengthSum);
std::string historyStr = "";
for (auto & h : history)
historyStr += h;
historyStr += ",";
}
if (!historyStr.empty())
historyStr.pop_back();
Franck Dary
committed
{
if (has(idColName, s, 0))
Franck Dary
committed
stackStr += getAsFeature(idColName, s);
Franck Dary
committed
else
stackStr += "?";
}
else
stackStr += std::to_string(s);
stackStr += ",";
}
if (!stackStr.empty())
stackStr.pop_back();
for (std::size_t index = characterIndex; index < util::getSize(rawInput) and index - characterIndex < lettersWindowSize; index++)
if (!util::isEmpty(rawInput))
fmt::print(dest, "State={}\nwordIndex={} characterIndex={}\nhistory=({})\nstack=({})\n", state, wordIndex, characterIndex, historyStr, stackStr);
fmt::print(dest, "{}\n", longLine);
for (unsigned int line = 0; line < toPrint.size(); line++)
{
if (line == 1)
fmt::print(dest, "{}\n", longLine);
for (unsigned int col = 0; col < toPrint[line].size()-1; col++)
fmt::print(dest, "{}{:>{}}{}", toPrint[line][col], "", colLength[col]-util::printedLength(toPrint[line][col]), col == toPrint[line].size()-2 ? "\n" : " ");
fmt::print(dest, "\n");
}
fmt::print(dest, "{}\n", longLine);
Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex)
{
if (!has(colIndex, lineIndex, 0))
util::myThrow(fmt::format("asked for line {} but last line = {}", lineIndex, getNbLines()+getFirstLineIndex()-1));
int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
for (int i = nbHypothesesMax; i > 0; --i)
if (!util::isEmpty(lines[baseIndex+i]))
return lines[baseIndex+i];
return lines[baseIndex];
}
Config::String & Config::getLastNotEmptyHyp(int colIndex, int lineIndex)
{
if (!has(colIndex, lineIndex, 0))
util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
for (int i = nbHypothesesMax; i > 0; --i)
if (!util::isEmpty(lines[baseIndex+i]))
return lines[baseIndex+i];
return lines[baseIndex+1];
}
Config::String & Config::getFirstEmpty(int colIndex, int lineIndex)
{
if (!has(colIndex, lineIndex, 0))
util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
if (util::isEmpty(lines[baseIndex+i]))
return lines[baseIndex+i];
return lines[baseIndex+nbHypothesesMax];
}
Config::String & Config::getFirstEmpty(const std::string & colName, int lineIndex)
{
return getFirstEmpty(getColIndex(colName), lineIndex);
}
const Config::String & Config::getLastNotEmptyConst(int colIndex, int lineIndex) const
{
if (!has(colIndex, lineIndex, 0))
util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
for (int i = nbHypothesesMax; i > 0; --i)
if (!util::isEmpty(lines[baseIndex+i]))
return lines[baseIndex+i];
return lines[baseIndex];
}
const Config::String & Config::getLastNotEmptyHypConst(int colIndex, int lineIndex) const
{
if (!has(colIndex, lineIndex, 0))
util::myThrow(fmt::format("asked for line {} but nbLines = {}", lineIndex, getNbLines()));
int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex);
for (int i = nbHypothesesMax; i > 0; --i)
if (!util::isEmpty(lines[baseIndex+i]))
return lines[baseIndex+i];
return lines[baseIndex+1];
Franck Dary
committed
}
const Config::String & Config::getAsFeature(int colIndex, int lineIndex) const
{
if (isPredicted(getColName(colIndex)))
return getLastNotEmptyHypConst(colIndex, lineIndex);
return getLastNotEmptyConst(colIndex, lineIndex);
}
const Config::String & Config::getAsFeature(const std::string & colName, int lineIndex) const
{
return getAsFeature(getColIndex(colName), lineIndex);
Config::String & Config::getLastNotEmpty(const std::string & colName, int lineIndex)
{
return getLastNotEmpty(getColIndex(colName), lineIndex);
}
Config::String & Config::getLastNotEmptyHyp(const std::string & colName, int lineIndex)
{
return getLastNotEmptyHyp(getColIndex(colName), lineIndex);
}
const Config::String & Config::getLastNotEmptyConst(const std::string & colName, int lineIndex) const
{
return getLastNotEmptyConst(getColIndex(colName), lineIndex);
}
const Config::String & Config::getLastNotEmptyHypConst(const std::string & colName, int lineIndex) const
{
return getLastNotEmptyHypConst(getColIndex(colName), lineIndex);
}
Config::ValueIterator Config::getIterator(int colIndex, int lineIndex, int hypothesisIndex)
{
return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex;
}
Config::ConstValueIterator Config::getConstIterator(int colIndex, int lineIndex, int hypothesisIndex) const
return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex;
void Config::addToHistory(const std::string & transition)
{
history.push_back(String(transition));
}
void Config::addToStack(std::size_t index)
{
stack.push_back(index);
}
void Config::swapStack(int relIndex1, int relIndex2)
{
int tmp = getStack(relIndex1);
getStackRef(relIndex1) = getStack(relIndex2);
return letterIndex >= 0 and letterIndex < (int)util::getSize(rawInput);
}
util::utf8char Config::getLetter(int letterIndex) const
return rawInput[letterIndex];
bool Config::isComment(std::size_t lineIndex) const
{
auto iter = getConstIterator(0, lineIndex, 0);
return !iter->get().empty() and iter->get()[0] == '#';
}
Franck Dary
committed
bool Config::isCommentPredicted(std::size_t lineIndex) const
{
Franck Dary
committed
auto & col0Pred = getAsFeature(0, lineIndex);
auto & col0Gold = getConst(0, lineIndex, 0);
return (!util::isEmpty(col0Pred) and col0Pred.get()[0] == '#') or (!util::isEmpty(col0Gold) and col0Gold.get()[0] == '#');
Franck Dary
committed
}
bool Config::isMultiword(std::size_t lineIndex) const
{
return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('-') != std::string::npos;
Franck Dary
committed
bool Config::isMultiwordPredicted(std::size_t lineIndex) const
{
return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('-') != std::string::npos;
}
int Config::getMultiwordSize(std::size_t lineIndex) const
{
auto splited = util::split(getConst(idColName, lineIndex, 0).get(), '-');
return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0]));
}
Franck Dary
committed
int Config::getMultiwordSizePredicted(std::size_t lineIndex) const
{
auto splited = util::split(getAsFeature(idColName, lineIndex).get(), '-');
return std::stoi(std::string(splited[1])) - std::stoi(std::string(splited[0]));
}
bool Config::isEmptyNode(std::size_t lineIndex) const
{
return hasColIndex(idColName) && getConst(idColName, lineIndex, 0).get().find('.') != std::string::npos;
Franck Dary
committed
bool Config::isEmptyNodePredicted(std::size_t lineIndex) const
{
return hasColIndex(idColName) && getAsFeature(idColName, lineIndex).get().find('.') != std::string::npos;
}
bool Config::isToken(std::size_t lineIndex) const
{
return !isComment(lineIndex) && !isMultiword(lineIndex) && !isEmptyNode(lineIndex);
}
Franck Dary
committed
bool Config::isTokenPredicted(std::size_t lineIndex) const
{
return !isCommentPredicted(lineIndex) && !isMultiwordPredicted(lineIndex) && !isEmptyNodePredicted(lineIndex);
}
bool Config::moveWordIndex(int relativeMovement)
{
int nbMovements = 0;
while (nbMovements != relativeMovement)
{
do
{
relativeMovement > 0 ? wordIndex++ : wordIndex--;
if (!has(0,wordIndex,0))
{
wordIndex = oldVal;
return false;
}
}
nbMovements += relativeMovement > 0 ? 1 : -1;
}
return true;
void Config::moveWordIndexRelaxed(int relativeMovement)
{
int nbMovements = 0;
int increment = relativeMovement > 0 ? 1 : -1;
while (nbMovements != relativeMovement)
{
do
{
if (!has(0,wordIndex+increment,0))
break;
wordIndex += increment;
}
while (isComment(wordIndex));
nbMovements += relativeMovement > 0 ? 1 : -1;
}
if (!isComment(wordIndex))
return;
moveWordIndex(-increment);
}
bool Config::canMoveWordIndex(int relativeMovement) const
int nbMovements = 0;
int oldVal = wordIndex;
while (nbMovements != relativeMovement)
relativeMovement > 0 ? oldVal++ : oldVal--;
if (!has(0,oldVal,0))
return false;
nbMovements += relativeMovement > 0 ? 1 : -1;
bool Config::moveCharacterIndex(int relativeMovement)
{
int oldVal = characterIndex;
characterIndex = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput)));
return (int)characterIndex == oldVal + relativeMovement;
}
bool Config::canMoveCharacterIndex(int relativeMovement) const
{
int target = std::max(0, (int)std::min(characterIndex+relativeMovement, util::getSize(rawInput)));
return target == (int)characterIndex + relativeMovement;
}
for (unsigned int i = characterIndex; i < util::getSize(rawInput); i++)
if (!util::isSeparator(rawInput[i]))
return false;
return true;
}
std::size_t Config::getWordIndex() const
{
return wordIndex;
}
std::size_t Config::getCharacterIndex() const
{
return characterIndex;
}
const Config::String & Config::getHistory(int relativeIndex) const
{
return history[history.size()-1-relativeIndex];
}
std::size_t Config::getStack(int relativeIndex) const
{
if (relativeIndex == -1)
return getLastPoppedStack();
return stack[stack.size()-1-relativeIndex];
}
std::size_t & Config::getStackRef(int relativeIndex)
{
return stack[stack.size()-1-relativeIndex];
}
bool Config::hasHistory(int relativeIndex) const
{
return relativeIndex >= 0 && relativeIndex < (int)history.size();
bool Config::hasStack(int relativeIndex) const
{
if (relativeIndex == -1)
return has(0,getLastPoppedStack(),0);
return relativeIndex >= 0 && relativeIndex < (int)stack.size();
Config::String Config::getState() const
{
return state;
}
void Config::setState(const std::string state)
{
this->state = state;
}
if (!util::isEmpty(rawInput))
return rawInputOnlySeparatorsLeft() and !has(0, wordIndex+1, 0) and !hasStack(0);
return !has(0, wordIndex+1, 0) and !hasStack(0);
void Config::addPredicted(const std::set<std::string> & predicted)
{
for (auto & col : predicted)
{
if (!hasColIndex(col))
util::myThrow(fmt::format("unknown column '{}'", col));
this->predicted.insert(col);
}
for (auto & col : extraColumns)
if (col != EOSColName)
this->predicted.insert(col);
}
bool Config::isPredicted(const std::string & colName) const
{
return predicted.count(colName);
}
int Config::getLastPoppedStack() const
{
return lastPoppedStack;
}
int Config::getCurrentWordId() const
{
return currentWordId;
}
void Config::setCurrentWordId(int currentWordId)
{
this->currentWordId = currentWordId;
}
Franck Dary
committed
void Config::addMissingColumns()
{
int firstIndex = 0;
for (unsigned int index = 0; index < getNbLines(); index++)
{
if (!isTokenPredicted(index))
continue;
if (util::isEmpty(getAsFeature(idColName, index)))
{
int last = 0;
if (index > 0 and isTokenPredicted(index-1))
last = std::stoi(getAsFeature(idColName, index-1));
getLastNotEmptyHyp(idColName, index) = std::to_string(last+1);
}
int curId = std::stoi(getAsFeature(idColName, index));
if (curId == 1)
firstIndex = index;
if (hasColIndex(headColName))
if (util::isEmpty(getAsFeature(headColName, index)))
getLastNotEmptyHyp(headColName, index) = (curId == 1) ? "0" : std::to_string(firstIndex);
Franck Dary
committed
}
}
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
long Config::getRelativeWordIndex(int relativeIndex) const
{
if (relativeIndex < 0)
{
for (int index = getWordIndex()-1, counter = 0; has(0,index,0); --index)
if (!isCommentPredicted(index))
{
--counter;
if (counter == relativeIndex)
return index;
}
}
else
{
for (int index = getWordIndex(), counter = 0; has(0,index,0); ++index)
if (!isCommentPredicted(index))
{
if (counter == relativeIndex)
return index;
++counter;
}
}
return -1;
}
long Config::getRelativeWordIndex(Object object, int relativeIndex) const
{
if (object == Object::Buffer)
return getRelativeWordIndex(relativeIndex);
return getStack(relativeIndex);
}
bool Config::hasRelativeWordIndex(Object object, int relativeIndex) const
{
if (object == Object::Buffer)
return has(0,getRelativeWordIndex(relativeIndex),0);
return hasStack(relativeIndex);
}
void Config::setAppliableSplitTransitions(const std::vector<Transition *> & appliableSplitTransitions)
{
this->appliableSplitTransitions = appliableSplitTransitions;
}
void Config::setAppliableTransitions(const std::vector<int> & appliableTransitions)
{
this->appliableTransitions = appliableTransitions;
}
const std::vector<Transition *> & Config::getAppliableSplitTransitions() const
{
return appliableSplitTransitions;
}
const std::vector<int> & Config::getAppliableTransitions() const
{
return appliableTransitions;
}
Config::Object Config::str2object(const std::string & s)
{
if (s == "b")
return Object::Buffer;
if (s == "s")
return Object::Stack;
util::myThrow(fmt::format("Invalid object '{}'", s));
return Object::Buffer;
}
bool Config::isExtraColumn(const std::string & colName) const
{
for (auto & extraCol : extraColumns)
if (extraCol == colName)
return true;
return false;
}
int Config::getLastAttached() const
{
return lastAttached;
}
void Config::setLastAttached(int lastAttached)
{
this->lastAttached = lastAttached;
}
std::size_t Config::getStackSize() const
{
return stack.size();
}
void Config::setStrategy(const std::vector<std::string> & strategyDefinition)