Something went wrong on our end
Select Git revision
-
Franck Dary authoredFranck Dary authored
Config.cpp 23.00 KiB
/*Copyright (c) 2019 Alexis Nasr && Franck Dary
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:i
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.*/
#include "Config.hpp"
#include <algorithm>
#include "File.hpp"
#include "ProgramParameters.hpp"
#include "Action.hpp"
#include "ProgramOutput.hpp"
#include "utf8.hpp"
Config::Config(BD & bd, const std::string inputFilename) : bd(bd), hashHistory(HISTORY_SIZE), pastActions(HISTORY_SIZE)
{
this->outputFile = nullptr;
this->stackHistory = -1;
this->lastIndexPrinted = -1;
this->inputFilename = inputFilename;
head = 0;
rawInputHead = 0;
currentWordIndex = 1;
rawInputHeadIndex = 0;
inputAllRead = false;
for(int i = 0; i < bd.getNbLines(); i++)
tapes.emplace_back(bd.getNameOfLine(i), bd.lineIsKnown(i));
this->totalEntropy = 0;
readInput();
}
Config::Config(const Config & other) : bd(other.bd), hashHistory(other.hashHistory), pastActions(other.pastActions)
{
this->currentStateName = other.currentStateName;
this->actionHistory = other.actionHistory;
this->entropyHistory = other.entropyHistory;
this->stack = other.stack;
this->stackHistory = other.stackHistory;
this->head = other.head;
this->outputFile = other.outputFile;
this->lastIndexPrinted = other.lastIndexPrinted;
this->tapes = other.tapes;
this->totalEntropy = other.totalEntropy;
this->rawInputHead = other.rawInputHead;
this->currentWordIndex = other.currentWordIndex;
this->rawInputHeadIndex = other.rawInputHeadIndex;
this->rawInput = other.rawInput;
this->inputFilename = other.inputFilename;
this->inputAllRead = other.inputAllRead;
this->file.reset(new File(*other.file.get()));
}
Config::Tape::Tape(const std::string & name, bool isKnown) : ref(ProgramParameters::readSize, Dict::unknownValueStr), hyp(ProgramParameters::readSize, std::make_pair(Dict::unknownValueStr, 0.0))
{
this->head = 0;
this->name = name;
this->isKnown = isKnown;
this->totalEntropy = 0.0;
}
bool Config::hasTape(const std::string & name)
{
return bd.hasLineOfName(name);
}
Config::Tape & Config::getTape(const std::string & name)
{
return tapes[bd.getLineOfName(name)];
}
Config::Tape & Config::getTapeByInputCol(int col)
{
return tapes[bd.getLineOfInputCol(col)];
}
void Config::readInput()
{
if (inputAllRead)
return;
if (ProgramParameters::rawInput)
{
file.reset(new File(inputFilename, "r"));
while (!file->isFinished())
rawInput += file->getChar();
inputAllRead = true;
for (auto & tape : tapes)
{
tape.addToRef("");
tape.addToHyp("");
}
return;
}
if (!file.get())
file.reset(new File(inputFilename, "r"));
FILE * fd = file->getDescriptor();
char buffer[100000];
buffer[0] = '\0';
int lineIndex = 0;
while (!std::feof(fd))
{
if (buffer != std::fgets(buffer, 100000, fd))
break;
if (buffer[std::strlen(buffer)-1] == '\n')
buffer[std::strlen(buffer)-1] = '\0';
lineIndex++;
if (!utf8::is_valid(buffer, buffer+std::strlen(buffer)))
{
fprintf(stderr, "ERROR (%s) : input (%s) line %d is not toally utf-8 formated. Aborting.\n", ERRINFO, inputFilename.c_str(), lineIndex);
exit(1);
}
if (std::strlen(buffer) <= 3)
{
if (inputContent.empty() || !inputContent.back().empty())
inputContent.emplace_back();
continue;
}
auto splited = util::split(buffer, '=');
if (splited[0] == "# sent_id ")
{
if (inputContent.empty() || !inputContent.back().empty())
inputContent.emplace_back();
}
else
{
std::string prefix = splited[0];
if (buffer[0] == '#' && prefix != "# text ")
continue;
}
if (inputContent.empty())
inputContent.emplace_back();
inputContent.back().emplace_back(buffer);
}
inputAllRead = true;
fillTapesWithInput();
}
void Config::fillTapesWithInput()
{
rawInput = "";
std::vector<std::string> cols;
unsigned int usualColsSize = 0;
auto & ids = getTape("ID");
bool hasGov = hasTape("GOV");
auto & govs = hasGov ? getTape("GOV") : ids;
for (auto & sentence : inputContent)
{
int sentenceStartIndex = ids.refSize();
for (unsigned int wordIndex = 0; wordIndex < sentence.size(); wordIndex++)
{
auto & word = sentence[wordIndex];
if (util::split(word, '=')[0] == "# text ")
{
std::string prefix = rawInput.empty() ? "" : " ";
if (util::choiceWithProbability(0.3))
prefix = "\n";
else if (util::choiceWithProbability(0.3))
prefix = "";
if (rawInput.empty())
prefix = "";
rawInput += prefix + std::string(word.begin()+9, word.end());
continue;
}
else if (word[0] == '#')
continue;
cols = util::split(word, '\t');
if (!usualColsSize)
usualColsSize = cols.size();
if (cols.size() != usualColsSize)
{
fprintf(stderr, "ERROR (%s) : input (%s) line %d has %lu columns instead of %u. Aborting.\n", ERRINFO, inputFilename.c_str(), tapes[0].size(), cols.size(), usualColsSize);
exit(1);
}
for(unsigned int i = 0; i < cols.size(); i++)
if(bd.hasLineOfInputCol(i))
{
auto & tape = getTapeByInputCol(i);
tape.addToRef(cols[i]);
tape.addToHyp("");
if (tape.getName() == ProgramParameters::tapeToMask)
if (util::choiceWithProbability(ProgramParameters::maskRate))
tape.maskIndex(tape.refSize()-1);
if (tape.getName() == ProgramParameters::sequenceDelimiterTape)
{
fprintf(stderr, "ERROR (%s) : Tape \'%s\' must not be given as a column in the input since it's the sequence delimiter. Aborting.\n", ERRINFO, tape.getName().c_str());
exit(1);
}
}
getTape(ProgramParameters::sequenceDelimiterTape).addToRef(wordIndex == sentence.size()-1 ? ProgramParameters::sequenceDelimiter : "");
getTape(ProgramParameters::sequenceDelimiterTape).addToHyp("");
}
for (int word = sentenceStartIndex; hasGov && word < ids.refSize(); word++)
{
if (util::split(ids.getRef(word), '-').size() > 1)
continue;
if (util::split(ids.getRef(word), '.').size() > 1)
continue;
if (govs.getRef(word) == "0")
continue;
try
{
int id = std::stoi(ids.getRef(word));
std::string goalId = govs.getRef(word);
int relativeIndex = 0;
if (std::stoi(goalId) < id)
{
while (ids.getRef(word+relativeIndex) != goalId)
{
if (--relativeIndex+word < 0)
throw "";
}
}
else
{
while (ids.getRef(word+relativeIndex) != goalId)
if (++relativeIndex+word >= ids.refSize())
throw "";
}
govs.setRef(word, std::to_string(relativeIndex));
}
catch (std::exception &)
{
fprintf(stderr, "ERROR (%s) : invalid governor '%s' '%s'. Aborting.\n", ERRINFO, govs.getRef(word).c_str(), getTape("FORM").getRef(word).c_str());
exit(1);
}
}
}
// Making all tapes the same size
int maxTapeSize = 0;
for(auto & tape : tapes)
maxTapeSize = std::max<unsigned int>(maxTapeSize, tape.refSize());
for(auto & tape : tapes)
{
while (tape.refSize() < maxTapeSize)
tape.addToRef("");
while (tape.hypSize() < maxTapeSize)
tape.addToHyp("");
}
}
void Config::printForDebug(FILE * output)
{
int window = 5;
std::vector< std::vector<std::string> > cols;
cols.emplace_back();
cols[0].emplace_back();
cols[0].emplace_back();
for(auto & tape : tapes)
{
cols[0].emplace_back(tape.getName());
for(int i = std::max(0, head-window); i < std::min(tape.hypSize(), head+window); i++)
{
unsigned int colIndex = i - std::max(0, head-window)+1;
while(cols.size() <= colIndex)
cols.emplace_back();
if(&tape == &tapes[0])
{
cols[colIndex].emplace_back(i == head ? "head" : std::to_string(i));
cols[colIndex].emplace_back(i == head ? " || " : "");
}
cols[colIndex].emplace_back(util::shrinkString(tape[i-head], 10, ".."));
}
}
fprintf(output, "Configuration : %.2f entropy\n", totalEntropy);
fprintf(output, "isFinal : %s endOfTapes : %s\n", isFinal() ? "true" : "false", endOfTapes() ? "true" : "false");
for(int i = 0; i < 80; i++)
fprintf(output, "-%s", i == 80-1 ? "\n" : "");
if (!rawInput.empty())
{
int rawWindow = 30;
int relativeHeadIndex = util::getEndIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.end(), rawWindow);
auto endIter = rawInput.begin() + rawInputHeadIndex + relativeHeadIndex + 1;
if (relativeHeadIndex < 0)
endIter = rawInput.end();
std::string toPrint(rawInput.begin()+rawInputHeadIndex, endIter);
fprintf(stderr, "%s\n", toPrint.c_str());
for(int i = 0; i < 80; i++)
fprintf(output, "-%s", i == 80-1 ? "\n" : "");
}
util::printColumns(output, cols, 3);
fprintf(output, "Stack : ");
for(int s : stack)
fprintf(output, "%d ", s);
fprintf(output, "\n");
for(int i = 0; i < 80; i++)
fprintf(output, "-%s", i == 80-1 ? "\n" : "");
}
void Config::printAsExample(FILE *)
{
fprintf(stderr, "ERROR (%s) : not supported. Aborting.\n", ERRINFO);
exit(1);
}
void Config::printAsOutput(FILE * output, int dataIndex, int realIndex, bool forceRef)
{
if (dataIndex == -1 || !output)
return;
lastIndexPrinted = dataIndex;
std::vector< std::pair<std::string, float> > toPrint;
for (unsigned int j = 0; j < tapes.size(); j++)
{
int outputTapeIndex = bd.getOutputIndexOfLine(j);
while ((int)toPrint.size() < outputTapeIndex+1)
toPrint.emplace_back("_", 0.0);
if(bd.mustPrintLine(j))
{
if (!forceRef)
toPrint[outputTapeIndex] = {tapes[j][dataIndex-head].empty() ? "_" : tapes[j][dataIndex-head].c_str(), tapes[j].getEntropy(dataIndex-head)};
else
toPrint[outputTapeIndex] = {tapes[j].getRef(dataIndex-head).empty() ? "_" : tapes[j].getRef(dataIndex-head).c_str(), tapes[j].getEntropy(dataIndex-head)};
}
}
bool allEmpty = true;
for (auto & it : toPrint)
if (it.first != "_" && !it.first.empty())
{
allEmpty = false;
break;
}
if (allEmpty)
return;
ProgramOutput::instance.addLine(output, toPrint, realIndex);
if (!ProgramParameters::delayedOutput)
{
auto eos = forceRef ? getTape(ProgramParameters::sequenceDelimiterTape).getRef(dataIndex-head) : getTape(ProgramParameters::sequenceDelimiterTape)[dataIndex-head];
if (eos == ProgramParameters::sequenceDelimiter)
fprintf(output, "\n");
}
}
void Config::moveHead(int mvt)
{
if (head + mvt <= tapes[0].size())
{
head += mvt;
if (mvt > 0)
for (int i = 0; i < mvt; i++)
if (hasTape("ID") && util::split(getTape("ID").getHyp(i), '-').size() <= 1)
currentWordIndex += 1;
if (mvt < 0)
for (int i = 0; i < mvt; i++)
if (hasTape("ID") && util::split(getTape("ID").getHyp(-i), '-').size() <= 1)
currentWordIndex -= 1;
for (auto & tape : tapes)
tape.moveHead(mvt);
}
else if (!endOfTapes())
{
fprintf(stderr, "ERROR (%s) : Input has not been read completely, yet the head is already at the end of tapes. Aborting.\n", ERRINFO);
exit(1);
}
}
void Config::moveRawInputHead(int mvt)
{
if (mvt >= 0)
{
int relativeIndexMvt = util::getStartIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.end(), mvt);
if (relativeIndexMvt > 0)
{
rawInputHead += mvt;
rawInputHeadIndex += relativeIndexMvt;
}
}
else
{
int relativeIndexMvt = util::getStartIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.begin(), mvt);
if (relativeIndexMvt < 0)
{
rawInputHeadIndex += relativeIndexMvt;
rawInputHead += mvt;
}
}
}
bool Config::isFinal()
{
if (rawInputHeadIndex > 0 && !rawInput.empty())
return (rawInputHeadIndex >= (int)rawInput.size());
return endOfTapes() && stack.empty();
}
void Config::reset()
{
for (auto & tape : tapes)
tape.clear();
actionHistory.clear();
pastActions.clear();
hashHistory.clear();
actionsHistory.clear();
stack.clear();
stackHistory = -1;
head = 0;
rawInputHead = 0;
rawInputHeadIndex = 0;
currentWordIndex = 1;
}
const std::string & Config::Tape::operator[](int relativeIndex)
{
if(isKnown)
return getRef(relativeIndex);
return getHyp(relativeIndex);
}
float Config::Tape::getEntropy(int relativeIndex)
{
if(isKnown)
return 0.0;
return hyp.get(head + relativeIndex).second;
}
const std::string & Config::Tape::getRef(int relativeIndex)
{
return ref.get(head + relativeIndex);
}
const std::string & Config::Tape::getHyp(int relativeIndex)
{
return hyp.get(head + relativeIndex).first;
}
void Config::Tape::setHyp(int relativeIndex, const std::string & elem)
{
hyp.set(head + relativeIndex, std::pair<std::string,float>(elem,totalEntropy));
}
void Config::Tape::setRef(int relativeIndex, const std::string & elem)
{
ref.set(head + relativeIndex, elem);
}
void Config::Tape::set(int relativeIndex, const std::string & elem)
{
if(isKnown)
return setRef(relativeIndex, elem);
return setHyp(relativeIndex, elem);
}
std::string & Config::getCurrentStateName()
{
if(currentStateName.empty())
{
fprintf(stderr, "ERROR (%s) : currentStateName is empty. Aborting.\n", ERRINFO);
exit(1);
}
return currentStateName;
}
void Config::setCurrentStateName(const std::string & name)
{
this->currentStateName = name;
}
LimitedStack<std::string> & Config::getCurrentStateHistory()
{
if (!actionHistory.count(getCurrentStateName()))
actionHistory.emplace(getCurrentStateName(), HISTORY_SIZE);
return actionHistory.find(getCurrentStateName())->second;
}
LimitedStack<std::string> & Config::getStateHistory(const std::string & state)
{
if (!actionHistory.count(state))
actionHistory.emplace(state, HISTORY_SIZE);
return actionHistory.find(state)->second;
}
LimitedStack<float> & Config::getCurrentStateEntropyHistory()
{
if (!entropyHistory.count(getCurrentStateName()))
entropyHistory.emplace(getCurrentStateName(), HISTORY_SIZE);
return entropyHistory.find(getCurrentStateName())->second;
}
void Config::shuffle()
{
std::random_shuffle(inputContent.begin(), inputContent.end());
}
int Config::stackGetElem(int index) const
{
if (index == -1)
return stackHistory;
if (index < 0 || index >= (int)stack.size())
{
fprintf(stderr, "ERROR (%s) : requested element index \'%d\' in the stack. Aborting.\n", ERRINFO, index);
exit(1);
}
return stack[stack.size()-1-index];
}
bool Config::stackHasIndex(int index) const
{
return index == -1 || (index >= 0 && index < (int)stack.size());
}
bool Config::stackEmpty() const
{
return !stackHasIndex(0);
}
void Config::stackPop()
{
if (stack.empty())
{
fprintf(stderr, "ERROR (%s) : Popping empty stack. Aborting.\n", ERRINFO);
exit(1);
}
stackHistory = stack.back();
stack.pop_back();
}
void Config::stackPush(int elem)
{
stack.push_back(elem);
}
int Config::stackTop()
{
if (stack.empty())
{
fprintf(stderr, "ERROR (%s) : Requesting back element of empty stack. Aborting.\n", ERRINFO);
exit(1);
}
return stack.back();
}
int Config::stackSize() const
{
return stack.size();
}
void Config::loadFromFile(File &)
{
fprintf(stderr, "ERROR (%s) : not supported. Aborting.\n", ERRINFO);
exit(1);
}
void Config::addToEntropyHistory(float entropy)
{
if (!entropyHistory.count(getCurrentStateName()))
entropyHistory.emplace(getCurrentStateName(), HISTORY_SIZE);
entropyHistory.find(getCurrentStateName())->second.push(entropy);
}
std::size_t Config::computeHash()
{
static std::hash<std::string> strhasher;
static std::hash<int> inthasher;
std::size_t result = 0;
result ^= (strhasher(currentStateName)*0x9e3779b9+(result << 6)+(result >>2));
result ^= (inthasher(getHead())*0x9e3779b9+(result << 6)+(result >>2));
return result;
}
void Config::addHashToHistory()
{
hashHistory.push(computeHash());
}
Dict * Config::getDictOfLine(int num)
{
return bd.getDictOfLine(num);
}
Dict * Config::getDictOfLine(const std::string & name)
{
return bd.getDictOfLine(name);
}
int Config::getHead() const
{
return head;
}
const std::string & Config::Tape::getName()
{
return name;
}
void Config::Tape::moveHead(int mvt)
{
head += mvt;
}
bool Config::endOfTapes() const
{
return inputAllRead && (tapes[0].headIsAtEnd() || (rawInput.size() > 0 && rawInputHeadIndex >= (int)rawInput.size()));
}
bool Config::Tape::headIsAtEnd() const
{
return head >= ref.getLastIndex();
}
int Config::Tape::size()
{
return refSize();
}
int Config::Tape::dataSize()
{
return ref.getDataSize();
}
int Config::Tape::refSize()
{
return ref.getLastIndex()+1;
}
int Config::Tape::hypSize()
{
return hyp.getLastIndex()+1;
}
void Config::Tape::addToHyp(const std::string & elem)
{
hyp.push(std::pair<std::string, float>(elem,totalEntropy));
}
void Config::Tape::addToRef(const std::string & elem)
{
ref.push(elem);
}
void Config::Tape::clear()
{
head = 0;
ref.clear();
hyp.clear();
}
void Config::Tape::copyPart(Tape & other, unsigned int from, unsigned int to)
{
ref.copy(other.ref, from, to);
hyp.copy(other.hyp, from, to);
}
void Config::Tape::clearDataForCopy()
{
ref.clearData();
hyp.clearData();
}
void Config::setOutputFile(FILE * outputFile)
{
this->outputFile = outputFile;
}
int Config::Tape::getNextOverridenDataIndex()
{
return ref.getNextOverridenDataIndex();
}
int Config::Tape::getNextOverridenRealIndex()
{
return ref.getNextOverridenRealIndex();
}
void Config::printTheRest(bool forceRef)
{
if (!outputFile)
return;
updateIdsInSequence();
setGovsAsUD(forceRef);
int tapeSize = tapes[0].size();
int goalPrintIndex = lastIndexPrinted;
int realIndex = tapeSize - ((((tapes[0].dataSize()-(goalPrintIndex == -1 ? 0 : 0)))-(goalPrintIndex+1))+(goalPrintIndex));
for (int i = goalPrintIndex+1; i < tapes[0].dataSize(); i++)
{
printAsOutput(outputFile, i, realIndex, forceRef);
realIndex++;
}
for (int i = 0; i < goalPrintIndex; i++)
{
printAsOutput(outputFile, i, realIndex, forceRef);
realIndex++;
}
}
void Config::setEntropy(float entropy)
{
totalEntropy = entropy;
for (auto & tape : tapes)
tape.setTotalEntropy(totalEntropy);
}
float Config::getEntropy() const
{
return totalEntropy;
}
void Config::addToEntropy(float entropy)
{
totalEntropy += entropy;
for (auto & tape : tapes)
tape.setTotalEntropy(totalEntropy);
}
void Config::Tape::setTotalEntropy(float entropy)
{
totalEntropy = entropy;
}
void Config::Tape::maskIndex(int index)
{
ref.maskIndex(index);
}
void Config::printColumnInfos(unsigned int index)
{
for (auto & tape : tapes)
fprintf(stderr, "%s\t: %s\n", tape.getName().c_str(), tape[index-getHead()].c_str());
fprintf(stderr, "\n");
}
void Config::addToActionsHistory(std::string & state, const std::string & action, int cost)
{
if (actionsHistory.size() > 2000)
for (auto it = actionsHistory.cbegin(); it != actionsHistory.cend();)
{
try
{
if (std::stoi(util::split(it->first, '_').back()) < head-20)
{
it = actionsHistory.erase(it);
continue;
}
} catch (std::exception &) {fprintf(stderr, "ERROR (%s) : calling std::stoi on \'%s\'.aborting.\n", ERRINFO, util::split(it->first, '_').back().c_str()); exit(1);}
it++;
}
actionsHistory[state+"_"+std::to_string(head)].emplace_back(action, cost);
}
std::vector< std::pair<std::string, int> > & Config::getActionsHistory(std::string & state)
{
return actionsHistory[state+"_"+std::to_string(head)];
}
float Config::Tape::getScore(int from, int to)
{
float res = 0.0;
for (int i = from; i <= to; i++)
if (getRef(i-head) == getHyp(i-head))
res += 1;
return 100.0*res / (1+to-from);
}
int Config::Tape::getHead()
{
return head;
}
void Config::transformSymbol(const std::string & from, const std::string & to)
{
for (auto & tape : tapes)
for (int i = 0; i < tape.size(); i++)
if (tape.getHyp(i-tape.getHead()) == from)
tape.setHyp(i-tape.getHead(), to);
}
void Config::setLastIndexPrinted(int lastIndexPrinted)
{
this->lastIndexPrinted = lastIndexPrinted;
}
void Config::setGovsAsUD(bool ref)
{
if (!hasTape("GOV"))
return;
auto & ids = getTape("ID");
auto & govs = getTape("GOV");
if (ref)
for (int i = 0; i < ids.refSize(); i++)
{
try
{
int relativeIndex = std::stoi(govs.getRef(i-head));
if (relativeIndex == 0)
continue;
auto idOfGov = ids.getRef(i+relativeIndex-head);
govs.setRef(i-head, idOfGov);
}
catch (std::exception &) {continue;}
}
else
for (int i = 0; i < ids.hypSize(); i++)
{
try
{
int relativeIndex = std::stoi(govs.getHyp(i-head));
if (relativeIndex == 0)
continue;
auto idOfGov = ids.getHyp(i+relativeIndex-head);
govs.setHyp(i-head, idOfGov);
}
catch (std::exception &) {continue;}
}
}
void Config::updateIdsInSequence()
{
int sentenceEnd = getHead();
auto & eos = getTape(ProgramParameters::sequenceDelimiterTape);
auto & ids = getTape("ID");
while (sentenceEnd >= 0 && eos[sentenceEnd-getHead()] != ProgramParameters::sequenceDelimiter)
sentenceEnd--;
int sentenceStart = std::max(0,sentenceEnd-1);
while (sentenceStart >= 0 && eos[sentenceStart-getHead()] != ProgramParameters::sequenceDelimiter)
sentenceStart--;
sentenceStart++;
if (sentenceEnd < 0)
{
sentenceStart = 0;
sentenceEnd = eos.hypSize()-1;
}
int curId = 1;
int digitIndex = 1;
for (int i = sentenceStart; i <= sentenceEnd; i++)
{
auto splited = util::split(ids.getRef(i-getHead()), '-');
if (splited.size() == 1)
{
auto splited2 = util::split(ids.getRef(i-getHead()), '.');
if (splited2.size() == 1)
{
ids.setHyp(i-getHead(), std::to_string(curId++));
digitIndex = 1;
continue;
}
ids.setHyp(i-getHead(), std::to_string(curId)+"."+std::to_string(digitIndex));
digitIndex++;
continue;
}
int multiWordSize = std::stoi(splited[1]) - std::stoi(splited[0]);
ids.setHyp(i-getHead(), std::to_string(curId)+"-"+std::to_string(curId+multiWordSize));
digitIndex = 1;
}
}