diff --git a/maca_common/include/LimitedArray.hpp b/maca_common/include/LimitedArray.hpp index c830423ee0aab50bd05ff2eaed5cca7d0f4ff86b..f7176f6ba6b25613c83d5cfe695a0f15555629d4 100644 --- a/maca_common/include/LimitedArray.hpp +++ b/maca_common/include/LimitedArray.hpp @@ -30,16 +30,17 @@ class LimitedArray nbElements = 0; lastElementDataIndex = -1; lastElementRealIndex = -1; + data.clear(); } void push(const T & elem) { nbElements++; - if (nbElements > data.size()) + if (nbElements > (int)data.size()) nbElements = data.size(); lastElementDataIndex++; - if (lastElementDataIndex >= data.size()) + if (lastElementDataIndex >= (int)data.size()) lastElementDataIndex = 0; lastElementRealIndex++; @@ -47,15 +48,25 @@ class LimitedArray data[lastElementDataIndex] = elem; } - const T & get(unsigned int index) + const T & get(unsigned int index) const { return data[index % data.size()]; } - int getLastIndex() + void set(unsigned int index, const T & value) + { + data[index % data.size()] = value; + } + + int getLastIndex() const { return lastElementRealIndex; } + + void copy(LimitedArray<T> & other, unsigned int from, unsigned int to) + { + std::copy(other.data.begin()+from, other.data.begin()+to, std::back_inserter(data)); + } }; #endif diff --git a/maca_common/include/LimitedStack.hpp b/maca_common/include/LimitedStack.hpp index 9994451a6e81b1c1b63e608986a20320dd78d41f..32422abd60fdf1f344fb2c0d9588e8c763cc289d 100644 --- a/maca_common/include/LimitedStack.hpp +++ b/maca_common/include/LimitedStack.hpp @@ -33,10 +33,10 @@ class LimitedStack void push(T elem) { nbElements++; - if (nbElements > data.size()) + if (nbElements > (int)data.size()) nbElements = data.size(); lastElementIndex++; - if (lastElementIndex >= data.size()) + if (lastElementIndex >= (int)data.size()) lastElementIndex = 0; data[lastElementIndex] = elem; @@ -80,7 +80,7 @@ class LimitedStack bool contains(const T & element) { int currentIndex = lastElementIndex; - for (unsigned int i = 0; i < nbElements; i++) + for (int i = 0; i < nbElements; i++) { if (data[currentIndex] == element) return true; diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp index 8fd98e450db028d18a69d785469409af1690d8ac..ed4149603b77154616a15f047be768f5a26a1768 100644 --- a/neural_network/src/MLPBase.cpp +++ b/neural_network/src/MLPBase.cpp @@ -312,9 +312,9 @@ void MLPBase::loadStruct(dynet::ParameterCollection & model, const std::string & exit(1); } - do - { - } while (fscanf(fd, "#TOPOLOGY# # {1,1} 0 Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4); + do + { + } while (fscanf(fd, "#TOPOLOGY# # {1,1} 0 Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4); } while (fscanf(fd, "#TOPOLOGY# # {1,1} 0 Layer : %d %d %s %f\n", &input, &output, activation, &dropout) != 4) @@ -346,7 +346,7 @@ void MLPBase::loadParameters(dynet::ParameterCollection & model, const std::stri { parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W"); parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b"); - } catch(const std::runtime_error e) + } catch(const std::runtime_error & e) { fprintf(stderr, "WARNING (%s) : Could not find parameter with key \'%s\' in the model. Ignore this if this model was trained with an older version of Macaon.\n", ERRINFO, (prefix+std::to_string(i) + "_W").c_str()); prefix = "Layer_"; diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index 6784c3ccc369f90e81898d2e55447395eb85d435..63c07cb43838cf962c24c23e8b4e2a8ef1f2df9d 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -56,7 +56,7 @@ void Trainer::computeScoreOnDev() // Print current iter advancement in percentage if (ProgramParameters::interactive) { - int totalSize = devConfig->tapes[0].hyp.size(); + int totalSize = ProgramParameters::tapeSize; int steps = devConfig->getHead(); if (steps && (steps % 200 == 0 || totalSize-steps < 200)) { @@ -208,7 +208,7 @@ void Trainer::train() // Print current iter advancement in percentage if (ProgramParameters::interactive) { - int totalSize = ProgramParameters::iterationSize == -1 ? trainConfig.tapes[0].hyp.size() : ProgramParameters::iterationSize; + int totalSize = ProgramParameters::iterationSize == -1 ? ProgramParameters::tapeSize : ProgramParameters::iterationSize; int steps = ProgramParameters::iterationSize == -1 ? trainConfig.getHead() : nbSteps; if (steps % 200 == 0 || totalSize-steps < 200) { @@ -226,7 +226,7 @@ void Trainer::train() if (zeroCostActions.empty()) { - if (trainConfig.getHead() >= (int)trainConfig.tapes[0].ref.size()-1) + if (trainConfig.endOfTapes()) { while (!trainConfig.stackEmpty()) trainConfig.stackPop(); diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index 02f251d6da8ac7c17d622d157ab17ba1f2c1934a..4f8e10d931a33fdea44c1e6421b01e5dda541d34 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -34,7 +34,9 @@ po::options_description getOptionsDescription() ("mcd", po::value<std::string>()->required(), "MCD file that describes the input") ("train,T", po::value<std::string>()->required(), - "Training corpus formated according to the MCD"); + "Training corpus formated according to the MCD") + ("tapeSize", po::value<int>()->required(), + "Number of lines in the input file."); po::options_description opt("Optional"); opt.add_options() @@ -67,8 +69,6 @@ po::options_description getOptionsDescription() "Remove identical training examples") ("showFeatureRepresentation", po::value<int>()->default_value(0), "For each state of the Config, show its feature representation") - ("tapeSize", po::value<int>()->default_value(100000), - "Number of lines in the input file.") ("interactive", po::value<bool>()->default_value(true), "Is the shell interactive ? Display advancement informations") ("randomEmbeddings", po::value<bool>()->default_value(false), diff --git a/transition_machine/include/Config.hpp b/transition_machine/include/Config.hpp index b0c0814b67c85a9672ecc5742075c26e30d1659e..922f642354749a3d1960d1a3eecdef1461e606b0 100644 --- a/transition_machine/include/Config.hpp +++ b/transition_machine/include/Config.hpp @@ -60,11 +60,16 @@ class Config /// @param relativeIndex The index of the cell relatively to the head. /// /// @return The content of the cell. - const std::string & gethyp(int relativeIndex); + const std::string & getHyp(int relativeIndex); + /// @brief Set the value of a cell of the hyp. + /// + /// @param relativeIndex The index of the cell relatively to the head. + /// @param elem The new content of the cell. + void setHyp(int relativeIndex, const std::string & elem); /// @brief Return true if the head of this tape is on the last cell. /// /// @return True if the head of this tape is on the last cell. - bool headIsAtEnd(); + bool headIsAtEnd() const; public : @@ -74,18 +79,38 @@ class Config /// /// @return The name of this Tape. const std::string & getName(); - /// @brief Set the name of this Tape. - /// - /// @param name The desired name. - void setName(const std::string & name); - /// @brief Set if the content of this tape is known or predicted. - /// - /// @param known The value to set. void setKnown(bool known); /// @brief Move the head of this tape. /// /// @param mvt The relative movement to apply to the head. void moveHead(int mvt); + /// @brief Return the current size of the Tape, in number of cells. + /// + /// @return The current number of cells in this Tape. + int size(); + /// @brief Return the current size of the ref Tape, in number of cells. + /// + /// @return The current number of cells in this Tape. + int refSize(); + /// @brief Return the current size of the hyp Tape, in number of cells. + /// + /// @return The current number of cells in this Tape. + int hypSize(); + /// @brief Add cell to ref. + /// + /// @param elem The content of the new cell. + void addToRef(const std::string & elem); + /// @brief Add cell to hyp. + /// + /// @param elem The content of the new cell. + void addToHyp(const std::string & elem); + /// @brief Empty the Tape. + void clear(); + /// @brief Copy a chunk of an other Tape inside this one. + /// + /// @param from first cell index of the chunk to copy. + /// @param to last cell index of the chunk to copy. + void copyPart(Tape & other, unsigned int from, unsigned int to); }; private : diff --git a/transition_machine/src/ActionBank.cpp b/transition_machine/src/ActionBank.cpp index 2de43a8c4e744c3695452a39657eff12d06926df..84b063d5505782693ac81e74387aada204ed97a7 100644 --- a/transition_machine/src/ActionBank.cpp +++ b/transition_machine/src/ActionBank.cpp @@ -394,7 +394,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na for (int i = c.stackSize()-1; i >= 0; i--) { auto s = c.stackGetElem(i); - if (govs.getHyp[s-b0] == "0") + if (govs.getHyp(s-b0) == "0") { simpleBufferWrite(c, "GOV", "", s-b0); break; @@ -578,9 +578,7 @@ void ActionBank::simpleBufferWrite(Config & config, const std::string & tapeName { auto & tape = config.getTape(tapeName); - int index = config.getHead() + relativeIndex; - - tape.setHyp(index, value); + tape.setHyp(relativeIndex, value); } bool ActionBank::simpleBufferWriteAppliable(Config & config, @@ -590,10 +588,10 @@ bool ActionBank::simpleBufferWriteAppliable(Config & config, int index = config.getHead() + relativeIndex; - if (c.endOfTapes()) + if (config.endOfTapes()) return true; - return !(index < 0); + return !(index < 0) && index < tape.size(); } bool ActionBank::isRuleAppliable(Config & config, @@ -611,7 +609,7 @@ void ActionBank::writeRuleResult(Config & config, const std::string & fromTapeNa auto & from = fromTape.getRef(relativeIndex); - toTape.setHyp(config.getHead() + relativeIndex, applyRule(from, rule)); + toTape.setHyp(relativeIndex, applyRule(from, rule)); } int ActionBank::getLinkLength(const Config & c, const std::string & action) diff --git a/transition_machine/src/Config.cpp b/transition_machine/src/Config.cpp index 2a9b04707c706f0deef4018b592397ea3c066a0d..1be439008ef66969edc73033cd2ddf37d7b282fc 100644 --- a/transition_machine/src/Config.cpp +++ b/transition_machine/src/Config.cpp @@ -36,7 +36,7 @@ void Config::readInput() { if (!file.get()) file.reset(new File(inputFilename, "r")); - FILE * fd = file.getDescriptor(); + FILE * fd = file->getDescriptor(); char buffer[10000]; std::vector<std::string> cols; @@ -45,8 +45,6 @@ void Config::readInput() int toRead = 100; int haveRead = 0; - bool finishedFile = false; - while(fscanf(fd, "%[^\n]\n", buffer) == 1 && haveRead < toRead) { cols = split(buffer, '\t'); @@ -55,7 +53,7 @@ void Config::readInput() if (cols.size() != usualColsSize) { - fprintf(stderr, "ERROR (%s) : input (%s) line %lu has %lu columns instead of %u. Aborting.\n", ERRINFO, filename.c_str(), tapes[0].ref.size(), cols.size(), usualColsSize); + fprintf(stderr, "ERROR (%s) : input (%s) line %d has %lu columns instead of %u. Aborting.\n", ERRINFO, inputFilename.c_str(), tapes[0].size(), cols.size(), usualColsSize); exit(1); } @@ -72,7 +70,7 @@ void Config::readInput() } // Making all tapes the same size - unsigned int maxTapeSize = 0; + int maxTapeSize = 0; for(auto & tape : tapes) maxTapeSize = std::max<unsigned int>(maxTapeSize, tape.refSize()); @@ -128,7 +126,7 @@ void Config::printForDebug(FILE * output) { cols[0].emplace_back(tape.getName()); - for(int i = std::max(0, head-window); i < std::min((int)tape.hyp.size(), head+window); i++) + for(int i = std::max(0, head-window); i < std::min(tape.hypSize(), head+window); i++) { unsigned int colIndex = i - std::max(0, head-window)+1; while(cols.size() <= colIndex) @@ -142,7 +140,6 @@ void Config::printForDebug(FILE * output) cols[colIndex].emplace_back(shrink(tape[i])); } - } fprintf(output, "Configuration :\n"); @@ -167,8 +164,11 @@ void Config::printAsExample(FILE *) exit(1); } -void Config::printAsOutput(FILE * output) +void Config::printAsOutput(FILE *) { + //TODO : Output should be done when reading, and at the end of the program. + + /* unsigned int lastToPrint = 0; for (unsigned int j = 0; j < tapes.size(); j++) if(bd.mustPrintLine(j)) @@ -180,11 +180,12 @@ void Config::printAsOutput(FILE * output) if(bd.mustPrintLine(j)) fprintf(output, "%s%s", tapes[j][i].empty() ? "0" : tapes[j][i].c_str(), j == lastToPrint ? "\n" : "\t"); } + */ } void Config::moveHead(int mvt) { - if (head + mvt < (int)tapes[0].ref.size()) + if (head + mvt < tapes[0].size()) { head += mvt; @@ -195,16 +196,13 @@ void Config::moveHead(int mvt) bool Config::isFinal() { - return head >= (int)getTapeByInputCol(0).hyp.size()-1 && stack.empty(); + return endOfTapes() && stack.empty(); } void Config::reset() { for (auto & tape : tapes) - { - tape.ref.clear(); - tape.hyp.clear(); - } + tape.clear(); actionHistory.clear(); pastActions.clear(); @@ -215,25 +213,31 @@ void Config::reset() head = 0; - readInput(inputFilename); + file.reset(); + readInput(); } const std::string & Config::Tape::operator[](int relativeIndex) { if(isKnown) - return getRef[relativeIndex]; + return getRef(relativeIndex); - return getHyp[relativeIndex]; + return getHyp(relativeIndex); } const std::string & Config::Tape::getRef(int relativeIndex) { - return ref[head + relativeIndex]; + return ref.get(head + relativeIndex); } -const std::string & Config::Tape::gethyp(int relativeIndex) +const std::string & Config::Tape::getHyp(int relativeIndex) { - return hyp[head + relativeIndex]; + return hyp.get(head + relativeIndex); +} + +void Config::Tape::setHyp(int relativeIndex, const std::string & elem) +{ + hyp.set(head + relativeIndex, elem); } std::string & Config::getCurrentStateName() @@ -282,8 +286,8 @@ void Config::shuffle(const std::string & delimiterTape, const std::string & deli std::vector< std::pair<unsigned int, unsigned int> > delimiters; unsigned int previousIndex = 0; - for (unsigned int i = 0; i < tape.ref.size(); i++) - if (tape.ref[i] == delimiter) + for (int i = 0; i < tape.refSize(); i++) + if (tape.getRef(i-head) == delimiter) { delimiters.emplace_back(previousIndex, i); previousIndex = i+1; @@ -295,28 +299,21 @@ void Config::shuffle(const std::string & delimiterTape, const std::string & deli exit(1); } - std::pair<unsigned int, unsigned int> suffix = {delimiters.back().second+1, tape.ref.size()-1}; + std::pair<unsigned int, unsigned int> suffix = {delimiters.back().second+1, tape.refSize()-1}; std::random_shuffle(delimiters.begin(), delimiters.end()); - std::vector<Tape> newTapes = tapes; + auto newTapes = tapes; for (unsigned int tape = 0; tape < tapes.size(); tape++) { - newTapes[tape].ref.clear(); - newTapes[tape].hyp.clear(); + newTapes[tape].clear(); for (auto & delimiter : delimiters) - { - std::copy(tapes[tape].ref.begin()+delimiter.first, tapes[tape].ref.begin()+delimiter.second+1, std::back_inserter(newTapes[tape].ref)); - std::copy(tapes[tape].hyp.begin()+delimiter.first, tapes[tape].hyp.begin()+delimiter.second+1, std::back_inserter(newTapes[tape].hyp)); - } + newTapes[tape].copyPart(tapes[tape], delimiter.first, delimiter.second+1); if (suffix.first <= suffix.second) - { - std::copy(tapes[tape].ref.begin()+suffix.first, tapes[tape].ref.begin()+suffix.second+1, std::back_inserter(newTapes[tape].ref)); - std::copy(tapes[tape].hyp.begin()+suffix.first, tapes[tape].hyp.begin()+suffix.second+1, std::back_inserter(newTapes[tape].hyp)); - } + newTapes[tape].copyPart(tapes[tape], suffix.first, suffix.second+1); } tapes = newTapes; @@ -397,15 +394,15 @@ std::size_t Config::computeHash() { static int window = 3; - unsigned int start = std::max(0, head-window); - unsigned int end = std::min((unsigned int)tapes[0].ref.size()-1, (unsigned int)head+window); + int start = std::max(0, head-window); + int end = std::min(tapes[0].refSize()-1, head+window); std::hash<std::string> hasher; std::size_t result = 0; - for (unsigned int i = start; i < end; i++) + for (int i = start; i < end; i++) for (auto & tape : tapes) - result ^= (hasher(tape[i])*0x9e3779b9+(result << 6)+(result >>2)); + result ^= (hasher(tape[i-head])*0x9e3779b9+(result << 6)+(result >>2)); return result; } @@ -435,28 +432,56 @@ const std::string & Config::Tape::getName() return name; } -void Config::Tape::setName(const std::string & name) +void Config::Tape::moveHead(int mvt) { - this->name = name; + head += mvt; } -void Config::Tape::setKnown(bool known) +bool Config::endOfTapes() const { - this->isKnown = known; + return inputAllRead && tapes[0].headIsAtEnd(); } -void Config::Tape::moveHead(int mvt) +bool Config::Tape::headIsAtEnd() const { - head += mvt; + return head == ref.getLastIndex(); } -bool Config::endOfTapes() const +int Config::Tape::size() { - return inputAllRead && tapes[0].headIsAtEnd(); + return refSize(); } -bool Config::Tape::headIsAtEnd() +int Config::Tape::refSize() { - return head == ref.getLastIndex(); + return ref.getLastIndex(); +} + +int Config::Tape::hypSize() +{ + return hyp.getLastIndex(); +} + +void Config::Tape::addToHyp(const std::string & elem) +{ + hyp.push(elem); +} + +void Config::Tape::addToRef(const std::string & elem) +{ + ref.push(elem); +} +void Config::Tape::clear() +{ + head = 0; + ref.clear(); + hyp.clear(); +} + +void Config::Tape::copyPart(Tape & other, unsigned int from, unsigned int to) +{ + ref.copy(other.ref, from, to); + hyp.copy(other.hyp, from, to); + } diff --git a/transition_machine/src/FeatureBank.cpp b/transition_machine/src/FeatureBank.cpp index 4d81ff699e18fd5930c25a0f5298eca37fbd1827..bf8f67f9629567c5404731ada458d60b41b2a968 100644 --- a/transition_machine/src/FeatureBank.cpp +++ b/transition_machine/src/FeatureBank.cpp @@ -190,6 +190,7 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s auto & eos = config.getTape(ProgramParameters::sequenceDelimiterTape); Dict * dict = config.getDictOfLine(tapeName); auto policy = dictPolicy2FeaturePolicy(dict->policy); + int b0 = config.getHead(); if(object == "s") { @@ -203,26 +204,26 @@ FeatureModel::FeatureValue FeatureBank::ldep(Config & config, int index, const s index += config.getHead(); } - if(index < 0 || index >= (int)tape.hyp.size()) + if(index < 0 || index >= tape.hypSize()) return {dict, featName, Dict::nullValueStr, policy}; int candidate = -1; unsigned int maxDist = 10; - for (int i = std::max<int>(index-1, 0); index - i <= (int)maxDist && i >= 0 && eos[i] != ProgramParameters::sequenceDelimiter; i--) + for (int i = std::max<int>(index-1, 0); index - i <= (int)maxDist && i >= 0 && eos[i-b0] != ProgramParameters::sequenceDelimiter; i--) { int dist = index - i; - if(govs[i] == std::to_string(dist)) + if(govs[i-b0] == std::to_string(dist)) candidate = i; } if(candidate == -1) return {dict, featName, Dict::nullValueStr, policy}; - if(tape[candidate].empty()) + if(tape[candidate-b0].empty()) return {dict, featName, Dict::nullValueStr, policy}; - return {dict, featName, tape[candidate], policy}; + return {dict, featName, tape[candidate-b0], policy}; } FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string & object1, int index1, const std::string & object2, int index2, const std::string & featName) @@ -234,7 +235,7 @@ FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string if (object1 == "b") { - if(index1 < 0 || index1 >= (int)config.tapes[0].hyp.size()) + if(index1 < 0 || index1+config.getHead() >= config.tapes[0].hypSize()) return {dict, featName, Dict::nullValueStr, policy}; elem1 = config.getHead() + index1; @@ -249,7 +250,7 @@ FeatureModel::FeatureValue FeatureBank::dist(Config & config, const std::string if (object2 == "b") { - if(index2 < 0 || index2 >= (int)config.tapes[0].hyp.size()) + if(index2 < 0 || index2+config.getHead() >= config.tapes[0].hypSize()) return {dict, featName, Dict::nullValueStr, policy}; elem2 = config.getHead() + index2; @@ -285,26 +286,26 @@ FeatureModel::FeatureValue FeatureBank::rdep(Config & config, int index, const s index += config.getHead(); } - if(index < 0 || index >= (int)tape.hyp.size()) + if(index < 0 || index >= tape.hypSize()) return {dict, featName, Dict::nullValueStr, policy}; int candidate = -1; - unsigned int maxDist = 10; + int maxDist = 10; - for (unsigned int i = index+1; i - index <= maxDist && i < eos.hyp.size() && eos[i-1] != ProgramParameters::sequenceDelimiter; i++) + for (int i = index+1; i - index <= maxDist && i < eos.hypSize() && eos[i-1-config.getHead()] != ProgramParameters::sequenceDelimiter; i++) { int dist = index - i; - if(govs[i] == std::to_string(dist)) + if(govs[i-config.getHead()] == std::to_string(dist)) candidate = i; } if(candidate == -1) return {dict, featName, Dict::nullValueStr, policy}; - if(tape[candidate].empty()) + if(tape[candidate-config.getHead()].empty()) return {dict, featName, Dict::nullValueStr, policy}; - return {dict, featName, tape[candidate], policy}; + return {dict, featName, tape[candidate-config.getHead()], policy}; } FeatureModel::FeatureValue FeatureBank::simpleBufferAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName) @@ -315,13 +316,13 @@ FeatureModel::FeatureValue FeatureBank::simpleBufferAccess(Config & config, int int index = config.getHead() + relativeIndex; - if(index < 0 || index >= (int)tape.hyp.size()) + if(index < 0 || index >= tape.hypSize()) return {dict, featName, Dict::nullValueStr, policy}; - if(tape[index].empty()) + if(tape[relativeIndex].empty()) return {dict, featName, Dict::nullValueStr, policy}; - return {dict, featName, tape[index], policy}; + return {dict, featName, tape[relativeIndex], policy}; } FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName) @@ -335,13 +336,13 @@ FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int r int index = config.stackGetElem(relativeIndex); - if(index < 0 || index >= (int)tape.hyp.size()) + if(index < 0 || index >= tape.hypSize()) return {dict, featName, Dict::nullValueStr, policy}; - if(tape[index].empty()) + if(tape[index-config.getHead()].empty()) return {dict, featName, Dict::nullValueStr, policy}; - return {dict, featName, tape[index], policy}; + return {dict, featName, tape[index-config.getHead()], policy}; } FeatureModel::FeatureValue FeatureBank::getUppercase(Config &, const FeatureModel::FeatureValue & fv) @@ -435,7 +436,7 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in { int index = c.getHead() + i; std::string featName = "b."+std::to_string(i)+"."+tape.getName(); - if(index < 0 || index >= (int)tape.hyp.size()) + if(index < 0 || index >= tape.hypSize()) { result.dicts.emplace_back(dict); result.names.emplace_back(featName); @@ -443,7 +444,7 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in result.policies.emplace_back(policy); continue; } - if(tape[index].empty()) + if(tape[i].empty()) { result.dicts.emplace_back(dict); result.names.emplace_back(featName); @@ -454,7 +455,7 @@ FeatureModel::FeatureValue FeatureBank::aggregateBuffer(Config & c, int from, in result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(tape[index]); + result.values.emplace_back(tape[i]); result.policies.emplace_back(policy); } } @@ -492,7 +493,7 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con continue; } int index = c.stackGetElem(i); - if((index < 0 || index >= (int)tape.hyp.size())||tape[index].empty()) + if( (index < 0 || index >= tape.hypSize()) || tape[index-c.getHead()].empty() ) { result.dicts.emplace_back(dict); result.names.emplace_back(featName); @@ -503,7 +504,7 @@ FeatureModel::FeatureValue FeatureBank::aggregateStack(Config & c, int from, con result.dicts.emplace_back(dict); result.names.emplace_back(featName); - result.values.emplace_back(tape[index]); + result.values.emplace_back(tape[index-c.getHead()]); result.policies.emplace_back(policy); } } diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index aab0be882f7dfcb2dd983099adce5957d6ee6380..2ca022dbca69fce22ce2b78a2be24d608ef6665a 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -204,7 +204,7 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *, const std::string & action) { - return action == "WRITE b.0 POS " + c.getTape("POS").getRef[0] || c.endOfTapes() ? 0 : 1; + return (action == "WRITE b.0 POS " + c.getTape("POS").getRef(0) || c.endOfTapes()) ? 0 : 1; }))); str2oracle.emplace("tokenizer", std::unique_ptr<Oracle>(new Oracle( @@ -220,7 +220,7 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *, const std::string & action) { - return action == "WRITE b.0 BIO " + c.getTape("BIO").ref[c.getHead()] || c.getHead() >= (int)c.tapes[0].ref.size()-1 ? 0 : 1; + return (action == "WRITE b.0 BIO " + c.getTape("BIO").getRef(0) || c.endOfTapes()) ? 0 : 1; }))); str2oracle.emplace("eos", std::unique_ptr<Oracle>(new Oracle( @@ -236,7 +236,7 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *, const std::string & action) { - return action == "WRITE b.0 " + ProgramParameters::sequenceDelimiterTape + " " + (c.getTape(ProgramParameters::sequenceDelimiterTape).ref[c.getHead()] == std::string(ProgramParameters::sequenceDelimiter) ? std::string(ProgramParameters::sequenceDelimiter) : std::string("0")) || c.getHead() >= (int)c.tapes[0].ref.size()-1 ? 0 : 1; + return (action == "WRITE b.0 " + ProgramParameters::sequenceDelimiterTape + " " + (c.getTape(ProgramParameters::sequenceDelimiterTape).getRef(0) == std::string(ProgramParameters::sequenceDelimiter) ? std::string(ProgramParameters::sequenceDelimiter) : std::string("0")) || c.endOfTapes()) ? 0 : 1; }))); str2oracle.emplace("morpho", std::unique_ptr<Oracle>(new Oracle( @@ -252,7 +252,7 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *, const std::string & action) { - return action == "WRITE b.0 MORPHO " + c.getTape("MORPHO").ref[c.getHead()] || c.getHead() >= (int)c.tapes[0].ref.size()-1 ? 0 : 1; + return (action == "WRITE b.0 MORPHO " + c.getTape("MORPHO").getRef(0) || c.endOfTapes()) ? 0 : 1; }))); str2oracle.emplace("signature", std::unique_ptr<Oracle>(new Oracle( @@ -270,20 +270,20 @@ void Oracle::createDatabase() [](Config & c, Oracle * oracle) { int window = 3; - int start = std::max<int>(c.getHead()-window, 0); - int end = std::min<int>(c.getHead()+window, c.getTape("SGN").hyp.size()-1); + int start = std::max<int>(c.getHead()-window, 0) - c.getHead(); + int end = std::min<int>(c.getHead()+window, c.getTape("SGN").size()-1) - c.getHead(); - while (start < (int)c.getTape("SGN").hyp.size() && !c.getTape("SGN").hyp[start].empty()) + while (start+c.getHead() < c.getTape("SGN").size() && !c.getTape("SGN").getHyp(start).empty()) start++; if (start > end) return std::string("NOTHING"); - std::string action("MULTIWRITE " + std::to_string(start-c.getHead()) + " " + std::to_string(end-c.getHead()) + " " + std::string("SGN")); + std::string action("MULTIWRITE " + std::to_string(start) + " " + std::to_string(end) + " " + std::string("SGN")); for(int i = start; i <= end; i++) { - const std::string & form = c.getTape("FORM").ref[i]; + const std::string & form = c.getTape("FORM").getRef(i); std::string & signature = oracle->data[form]; if(signature.empty()) @@ -357,11 +357,11 @@ void Oracle::createDatabase() }, [](Config & c, Oracle *, const std::string & action) { - const std::string & form = c.getTape("FORM").ref[c.getHead()]; - const std::string & lemma = c.getTape("LEMMA").ref[c.getHead()]; + const std::string & form = c.getTape("FORM").getRef(0); + const std::string & lemma = c.getTape("LEMMA").getRef(0); std::string rule = getRule(form, lemma); - return action == std::string("RULE LEMMA ON FORM ") + rule || c.getHead() >= (int)c.tapes[0].ref.size()-1 ? 0 : 1; + return (action == std::string("RULE LEMMA ON FORM ") + rule || c.endOfTapes()) ? 0 : 1; }))); str2oracle.emplace("parser", std::unique_ptr<Oracle>(new Oracle( @@ -383,20 +383,20 @@ void Oracle::createDatabase() int head = c.getHead(); int stackHead = c.stackEmpty() ? 0 : c.stackTop(); - int stackGov = stackHead + std::stoi(govs.ref[stackHead]); - int headGov = head + std::stoi(govs.ref[head]); + int stackGov = stackHead + std::stoi(govs.getRef(stackHead-head)); + int headGov = head + std::stoi(govs.getRef(0)); int sentenceStart = c.getHead()-1 < 0 ? 0 : c.getHead()-1; int sentenceEnd = c.getHead(); int cost = 0; - while(sentenceStart >= 0 && eos.ref[sentenceStart] != ProgramParameters::sequenceDelimiter) + while(sentenceStart >= 0 && eos.getRef(sentenceStart-head) != ProgramParameters::sequenceDelimiter) sentenceStart--; if (sentenceStart != 0) sentenceStart++; - while(sentenceEnd < (int)eos.ref.size() && eos.ref[sentenceEnd] != ProgramParameters::sequenceDelimiter) + while(sentenceEnd < eos.refSize() && eos.getRef(sentenceEnd-head) != ProgramParameters::sequenceDelimiter) sentenceEnd++; - if (sentenceEnd == (int)eos.ref.size()) + if (sentenceEnd == eos.refSize()) sentenceEnd--; auto parts = split(action); @@ -405,13 +405,13 @@ void Oracle::createDatabase() { for (int i = sentenceStart; i <= sentenceEnd; i++) { - if (!isNum(govs.ref[i])) + if (!isNum(govs.getRef(i-head))) { - fprintf(stderr, "ERROR (%s) : govs.ref[%d] = <%s>. Aborting.\n", ERRINFO, i, govs.ref[i].c_str()); + fprintf(stderr, "ERROR (%s) : govs.ref[%d] = <%s>. Aborting.\n", ERRINFO, i, govs.getRef(i-head).c_str()); exit(1); } - int otherGov = i + std::stoi(govs.ref[i]); + int otherGov = i + std::stoi(govs.getRef(i-head)); for (int j = 0; j < c.stackSize(); j++) { @@ -422,7 +422,7 @@ void Oracle::createDatabase() } } - return eos.ref[stackHead] != ProgramParameters::sequenceDelimiter ? cost : cost+1; + return eos.getRef(stackHead-head) != ProgramParameters::sequenceDelimiter ? cost : cost+1; } else if (parts[0] == "WRITE" && parts.size() == 4) { @@ -430,17 +430,17 @@ void Oracle::createDatabase() if (object[0] == "b") { if (parts[2] == "LABEL") - return action == "WRITE b.0 LABEL " + c.getTape("LABEL").ref[c.getHead()] || c.getHead() >= (int)c.tapes[0].ref.size()-1 || c.getTape("LABEL").ref[c.getHead()] == "root" ? 0 : 1; + return (action == "WRITE b.0 LABEL " + c.getTape("LABEL").getRef(0) || c.endOfTapes() || c.getTape("LABEL").getRef(0) == "root") ? 0 : 1; else if (parts[2] == "GOV") - return action == "WRITE b.0 GOV " + c.getTape("GOV").ref[c.getHead()] || c.getHead() >= (int)c.tapes[0].ref.size()-1 ? 0 : 1; + return (action == "WRITE b.0 GOV " + c.getTape("GOV").getRef(0) || c.endOfTapes()) ? 0 : 1; } else if (object[0] == "s") { int index = c.stackGetElem(-1); if (parts[2] == "LABEL") - return action == "WRITE s.-1 LABEL " + c.getTape("LABEL").ref[index] || c.getTape("LABEL").ref[index] == "root" ? 0 : 1; + return (action == "WRITE s.-1 LABEL " + c.getTape("LABEL").getRef(index-head) || c.getTape("LABEL").getRef(index-head) == "root") ? 0 : 1; else if (parts[2] == "GOV") - return action == "WRITE s.-1 GOV " + c.getTape("GOV").ref[index] ? 0 : 1; + return (action == "WRITE s.-1 GOV " + c.getTape("GOV").getRef(index-head)) ? 0 : 1; } return 1; @@ -449,21 +449,21 @@ void Oracle::createDatabase() { for (int i = head; i <= sentenceEnd; i++) { - int otherGov = i + std::stoi(govs.ref[i]); + int otherGov = i + std::stoi(govs.getRef(i-head)); if (otherGov == stackHead) cost++; } - return eos.ref[stackHead] != ProgramParameters::sequenceDelimiter ? cost : cost+1; + return eos.getRef(stackHead-head) != ProgramParameters::sequenceDelimiter ? cost : cost+1; } else if (parts[0] == "LEFT") { - if (eos.ref[stackHead] == ProgramParameters::sequenceDelimiter) + if (eos.getRef(stackHead-head) == ProgramParameters::sequenceDelimiter) cost++; for (int i = head+1; i <= sentenceEnd; i++) { - int otherGov = i + std::stoi(govs.ref[i]); + int otherGov = i + std::stoi(govs.getRef(i-head)); if (otherGov == stackHead || stackGov == i) cost++; } @@ -471,7 +471,7 @@ void Oracle::createDatabase() if (stackGov != head) cost++; - return parts.size() == 1 || labels.ref[stackHead] == parts[1] ? cost : cost+1; + return parts.size() == 1 || labels.getRef(stackHead-head) == parts[1] ? cost : cost+1; } else if (parts[0] == "RIGHT") { @@ -482,7 +482,7 @@ void Oracle::createDatabase() if (s == c.stackTop()) continue; - int otherGov = s + std::stoi(govs.ref[s]); + int otherGov = s + std::stoi(govs.getRef(s-head)); if (otherGov == head || headGov == s) cost++; } @@ -491,11 +491,11 @@ void Oracle::createDatabase() if (headGov == i) cost++; - return parts.size() == 1 || labels.ref[head] == parts[1] ? cost : cost+1; + return parts.size() == 1 || labels.getRef(0) == parts[1] ? cost : cost+1; } else if (parts[0] == ProgramParameters::sequenceDelimiterTape) { - return eos.ref[stackHead] == ProgramParameters::sequenceDelimiter ? cost : cost+1; + return eos.getRef(stackHead-head) == ProgramParameters::sequenceDelimiter ? cost : cost+1; } return cost; @@ -558,13 +558,13 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & if (object[0] == "b") { int index = c.getHead() + std::stoi(object[1]); - expected = c.getTape(tape).ref[index]; + expected = c.getTape(tape).getRef(index-c.getHead()); } else if (object[0] == "s") { int stackIndex = std::stoi(object[1]); int bufferIndex = c.stackGetElem(stackIndex) + c.getHead(); - expected = c.getTape(tape).ref[bufferIndex]; + expected = c.getTape(tape).getRef(bufferIndex-c.getHead()); } else { @@ -581,25 +581,25 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & int head = c.getHead(); int stackHead = c.stackEmpty() ? 0 : c.stackTop(); - int stackGov = stackHead + std::stoi(govs.ref[stackHead]); - int headGov = head + std::stoi(govs.ref[head]); + int stackGov = stackHead + std::stoi(govs.getRef(stackHead-head)); + int headGov = head + std::stoi(govs.getRef(0)); int sentenceStart = c.getHead()-1 < 0 ? 0 : c.getHead()-1; int sentenceEnd = c.getHead(); - while(sentenceStart >= 0 && eos.ref[sentenceStart] != ProgramParameters::sequenceDelimiter) + while(sentenceStart >= 0 && eos.getRef(sentenceStart-head) != ProgramParameters::sequenceDelimiter) sentenceStart--; if (sentenceStart != 0) sentenceStart++; - while(sentenceEnd < (int)eos.ref.size() && eos.ref[sentenceEnd] != ProgramParameters::sequenceDelimiter) + while(sentenceEnd < eos.refSize() && eos.getRef(sentenceEnd-head) != ProgramParameters::sequenceDelimiter) sentenceEnd++; - if (sentenceEnd == (int)eos.ref.size()) + if (sentenceEnd == eos.refSize()) sentenceEnd--; if (parts[0] == "SHIFT") { for (int i = sentenceStart; i <= sentenceEnd; i++) { - int otherGov = i + std::stoi(govs.ref[i]); + int otherGov = i + std::stoi(govs.getRef(i-head)); for (int j = 0; j < c.stackSize(); j++) { @@ -608,19 +608,19 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & { if (otherGov == head) { - fprintf(output, "Word on stack %d(%s)\'s governor is the current getHead()\n", s, c.getTape("FORM").ref[s].c_str()); + fprintf(output, "Word on stack %d(%s)\'s governor is the current getHead()\n", s, c.getTape("FORM").getRef(s-head).c_str()); return; } else if (headGov == s) { - fprintf(output, "The current getHead()\'s governor is on the stack %d(%s)\n", s, c.getTape("FORM").ref[s].c_str()); + fprintf(output, "The current getHead()\'s governor is on the stack %d(%s)\n", s, c.getTape("FORM").getRef(s-head).c_str()); return; } } } } - if (eos.ref[stackHead] != ProgramParameters::sequenceDelimiter) + if (eos.getRef(0) != ProgramParameters::sequenceDelimiter) { fprintf(output, "Zero cost\n"); return; @@ -634,15 +634,15 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & { for (int i = head; i <= sentenceEnd; i++) { - int otherGov = i + std::stoi(govs.ref[i]); + int otherGov = i + std::stoi(govs.getRef(i-head)); if (otherGov == stackHead) { - fprintf(output, "Stack getHead() is the governor of %d(%s)\n", i, c.getTape("FORM").ref[i].c_str()); + fprintf(output, "Stack getHead() is the governor of %d(%s)\n", i, c.getTape("FORM").getRef(i-head).c_str()); return; } } - if (eos.ref[stackHead] != ProgramParameters::sequenceDelimiter) + if (eos.getRef(stackHead-head) != ProgramParameters::sequenceDelimiter) { fprintf(output, "Zero cost\n"); return; @@ -654,7 +654,7 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & } else if (parts[0] == "LEFT") { - if (parts.size() == 2 && stackGov == head && labels.ref[stackHead] == parts[1]) + if (parts.size() == 2 && stackGov == head && labels.getRef(stackHead-head) == parts[1]) { fprintf(output, "Zero cost\n"); return; @@ -665,23 +665,23 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & return; } - if (labels.ref[stackHead] != parts[1]) + if (labels.getRef(stackHead-head) != parts[1]) { - fprintf(output, "Stack getHead() label %s mismatch with action label %s\n", labels.ref[stackHead].c_str(), parts[1].c_str()); + fprintf(output, "Stack getHead() label %s mismatch with action label %s\n", labels.getRef(stackHead-head).c_str(), parts[1].c_str()); return; } for (int i = head; i <= sentenceEnd; i++) { - int otherGov = i + std::stoi(govs.ref[i]); + int otherGov = i + std::stoi(govs.getRef(i-head)); if (otherGov == stackHead) { - fprintf(output, "Word %d(%s)\'s governor is the stack getHead()\n", i, c.getTape("FORM").ref[i].c_str()); + fprintf(output, "Word %d(%s)\'s governor is the stack getHead()\n", i, c.getTape("FORM").getRef(i-head).c_str()); return; } else if (stackGov == i) { - fprintf(output, "Stack getHead()\'s governor is the word %d(%s)\n", i, c.getTape("FORM").ref[i].c_str()); + fprintf(output, "Stack getHead()\'s governor is the word %d(%s)\n", i, c.getTape("FORM").getRef(i-head).c_str()); return; } } @@ -703,15 +703,15 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & if (s == c.stackTop()) continue; - int otherGov = s + std::stoi(govs.ref[s]); + int otherGov = s + std::stoi(govs.getRef(s-head)); if (otherGov == head) { - fprintf(output, "The governor of %d(%s) in the stack, is the current head\n", s, c.getTape("FORM").ref[s].c_str()); + fprintf(output, "The governor of %d(%s) in the stack, is the current head\n", s, c.getTape("FORM").getRef(s-head).c_str()); return; } else if (headGov == s) { - fprintf(output, "The current head's governor is the stack element %d(%s)\n", s, c.getTape("FORM").ref[s].c_str()); + fprintf(output, "The current head's governor is the stack element %d(%s)\n", s, c.getTape("FORM").getRef(s-head).c_str()); return; } } @@ -719,7 +719,7 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & for (int i = head; i <= sentenceEnd; i++) if (headGov == i) { - fprintf(output, "The current head's governor is the future word %d(%s)\n", i, c.getTape("FORM").ref[i].c_str()); + fprintf(output, "The current head's governor is the future word %d(%s)\n", i, c.getTape("FORM").getRef(i-head).c_str()); return; } @@ -729,20 +729,20 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & return; } - if (labels.ref[head] == parts[1]) + if (labels.getRef(0) == parts[1]) { fprintf(output, "Zero cost\n"); return; } else { - fprintf(output, "Current head's label %s mismatch action label %s\n", labels.ref[head].c_str(), parts[1].c_str()); + fprintf(output, "Current head's label %s mismatch action label %s\n", labels.getRef(0).c_str(), parts[1].c_str()); return; } } else if (parts[0] == ProgramParameters::sequenceDelimiterTape) { - if (eos.ref[stackHead] == ProgramParameters::sequenceDelimiter) + if (eos.getRef(stackHead-head) == ProgramParameters::sequenceDelimiter) { fprintf(output, "Zero cost\n"); return;