From 7b8c6b5a308705a8832b5613bece5f82bed0c2fa Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 16 Oct 2019 12:08:35 +0200 Subject: [PATCH] Added namespace to util and corrected conllu reading when metadata are missing --- decoder/src/Decoder.cpp | 2 +- decoder/src/macaon_decode.cpp | 6 +- error_correction/src/Error.cpp | 30 ++++---- maca_common/include/util.hpp | 5 +- maca_common/src/Dict.cpp | 4 +- maca_common/src/ProgramOutput.cpp | 2 +- maca_common/src/macaon_compute_l_rules.cpp | 4 +- maca_common/src/macaon_convert_embeddings.cpp | 2 +- maca_common/src/programOptionsTemplates.cpp | 8 +-- maca_common/src/util.cpp | 6 +- neural_network/src/GeneticAlgorithm.cpp | 14 ++-- neural_network/src/MLP.cpp | 2 +- neural_network/src/MLPBase.cpp | 4 +- neural_network/src/MultiMLP.cpp | 8 +-- neural_network/src/NeuralNetwork.cpp | 2 +- neural_network/src/ReversedMLP.cpp | 2 +- trainer/src/TrainInfos.cpp | 36 +++++----- trainer/src/Trainer.cpp | 22 +++--- transition_machine/src/Action.cpp | 2 +- transition_machine/src/ActionBank.cpp | 54 +++++++------- transition_machine/src/ActionSet.cpp | 2 +- transition_machine/src/BD.cpp | 4 +- transition_machine/src/Classifier.cpp | 6 +- transition_machine/src/Config.cpp | 68 ++++++++++++------ transition_machine/src/FeatureBank.cpp | 28 ++++---- transition_machine/src/FeatureModel.cpp | 18 ++--- transition_machine/src/Oracle.cpp | 72 +++++++++---------- 27 files changed, 221 insertions(+), 192 deletions(-) diff --git a/decoder/src/Decoder.cpp b/decoder/src/Decoder.cpp index 3e01479..b9aa33d 100644 --- a/decoder/src/Decoder.cpp +++ b/decoder/src/Decoder.cpp @@ -77,7 +77,7 @@ void printAdvancement(Config & config, float currentSpeed, int nbActionsCutoff) steps = config.rawInputHeadIndex; } if (steps && (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff)) - fprintf(stderr, "Decode : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str()); + fprintf(stderr, "Decode : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, util::int2humanStr((int)currentSpeed).c_str()); } } diff --git a/decoder/src/macaon_decode.cpp b/decoder/src/macaon_decode.cpp index b488671..9f02475 100644 --- a/decoder/src/macaon_decode.cpp +++ b/decoder/src/macaon_decode.cpp @@ -168,7 +168,7 @@ int main(int argc, char * argv[]) ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>(); ProgramParameters::sequenceDelimiter = vm["sequenceDelimiter"].as<std::string>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); - ProgramParameters::tapeSize = ProgramParameters::rawInput ? 200000 : getNbLines(ProgramParameters::input); + ProgramParameters::tapeSize = ProgramParameters::rawInput ? 200000 : util::getNbLines(ProgramParameters::input); ProgramParameters::readSize = vm["readSize"].as<int>(); if (ProgramParameters::readSize == 0) ProgramParameters::readSize = ProgramParameters::tapeSize; @@ -182,10 +182,10 @@ int main(int argc, char * argv[]) std::string featureModels = vm["featureModels"].as<std::string>(); if (!featureModels.empty()) { - auto byClassifiers = split(featureModels, ','); + auto byClassifiers = util::split(featureModels, ','); for (auto & classifier : byClassifiers) { - auto parts = split(classifier, '='); + auto parts = util::split(classifier, '='); if (parts.size() != 2) { fprintf(stderr, "ERROR (%s) : wrong format for argument of option featureModels. Aborting.\n", ERRINFO); diff --git a/error_correction/src/Error.cpp b/error_correction/src/Error.cpp index 5a9985d..b46066d 100644 --- a/error_correction/src/Error.cpp +++ b/error_correction/src/Error.cpp @@ -12,7 +12,7 @@ prediction(prediction), gold(gold), weightedActions(weightedActions), cost(cost) { type = prediction + "->" + gold; if (ProgramParameters::onlyPrefixes) - type = split(prediction,' ')[0] + "->" + split(gold,' ')[0]; + type = util::split(prediction,' ')[0] + "->" + util::split(gold,' ')[0]; indexOfPrediction = -1; indexOfGold = -1; distanceWithGold = 0; @@ -215,20 +215,20 @@ void Errors::printStats() for (auto & it : typesOccurences) { columns[0].emplace_back(it.first); - columns[1].emplace_back("= " + float2str(it.second*100.0/nbErrorsTotal,"%.2f%%")); + columns[1].emplace_back("= " + util::float2str(it.second*100.0/nbErrorsTotal,"%.2f%%")); columns[2].emplace_back("of errors"); columns[3].emplace_back("("+std::to_string(it.second) + " / " + std::to_string(nbErrorsTotal) + ")"); if (ProgramParameters::meanEntropy) - columns[4].emplace_back("mean entropy : " + float2str(meanEntropyByType[it.first], "%.2f")); + columns[4].emplace_back("mean entropy : " + util::float2str(meanEntropyByType[it.first], "%.2f")); for (unsigned int dist = minDistanceToCheck; dist <= maxDistanceToCheck; dist++) { columns[0].emplace_back(" Gold at distance"); columns[1].emplace_back(std::to_string(dist)); - columns[2].emplace_back(float2str(distanceOfGoldByType[it.first][dist]*100.0/nbErrorOccurencesByType[it.first],"%.2f%%")); + columns[2].emplace_back(util::float2str(distanceOfGoldByType[it.first][dist]*100.0/nbErrorOccurencesByType[it.first],"%.2f%%")); columns[3].emplace_back("of the time"); if (ProgramParameters::meanEntropy) - columns[4].emplace_back("mean entropy : " + float2str(meanEntropyByDistanceByType[it.first][dist], "%.2f")); + columns[4].emplace_back("mean entropy : " + util::float2str(meanEntropyByDistanceByType[it.first][dist], "%.2f")); } for (auto & col : columns) @@ -236,7 +236,7 @@ void Errors::printStats() } printLine(); - printColumns(stderr, columns, 1); + util::printColumns(stderr, columns, 1); printLine(); std::vector< std::pair<std::string,int> > typesFirstOccurences; @@ -256,29 +256,29 @@ void Errors::printStats() for (auto & it : typesFirstOccurences) { columns[0].emplace_back(it.first); - columns[1].emplace_back("= " + float2str(it.second*100.0/nbFirstErrorsTotal,"%.2f%%")); + columns[1].emplace_back("= " + util::float2str(it.second*100.0/nbFirstErrorsTotal,"%.2f%%")); columns[2].emplace_back("of first errors"); columns[3].emplace_back("("+std::to_string(it.second) + " / " + std::to_string(nbFirstErrorsTotal) + ")"); - columns[4].emplace_back("introduces " + float2str(nbFirstErrorIntroduced[it.first],"%.2f errors")); + columns[4].emplace_back("introduces " + util::float2str(nbFirstErrorIntroduced[it.first],"%.2f errors")); if (ProgramParameters::meanEntropy) - columns[5].emplace_back("mean entropy : " + float2str(meanEntropyByType[it.first], "%.2f")); + columns[5].emplace_back("mean entropy : " + util::float2str(meanEntropyByType[it.first], "%.2f")); for (unsigned int dist = minDistanceToCheck; dist <= maxDistanceToCheck; dist++) { columns[0].emplace_back(" Gold at distance"); columns[1].emplace_back(std::to_string(dist)); - columns[2].emplace_back(float2str(distanceOfGoldByFirstType[it.first][dist]*100.0/nbFirstErrorOccurencesByType[it.first],"%.2f%%")); + columns[2].emplace_back(util::float2str(distanceOfGoldByFirstType[it.first][dist]*100.0/nbFirstErrorOccurencesByType[it.first],"%.2f%%")); columns[3].emplace_back("of the time"); columns[4].emplace_back(""); if (ProgramParameters::meanEntropy) - columns[5].emplace_back("mean entropy : " + float2str(meanEntropyByDistanceByFirstType[it.first][dist], "%.2f")); + columns[5].emplace_back("mean entropy : " + util::float2str(meanEntropyByDistanceByFirstType[it.first][dist], "%.2f")); } for (auto & col : columns) col.emplace_back(""); } - printColumns(stderr, columns, 1); + util::printColumns(stderr, columns, 1); printLine(); columns.clear(); columns.resize(4); @@ -296,15 +296,15 @@ void Errors::printStats() float percGold = 100.0*errorsGold / totalGold; columns[0].emplace_back("Errors when link is of length " + std::to_string(i)); columns[1].emplace_back("in the hypothesis"); - columns[2].emplace_back(": "+float2str(percHypo, "%.2f%%")); + columns[2].emplace_back(": "+util::float2str(percHypo, "%.2f%%")); columns[3].emplace_back("("+std::to_string(errorsHypo)+"/"+std::to_string(totalHypo)+")"); columns[0].emplace_back("Errors when link is of length " + std::to_string(i)); columns[1].emplace_back("in the reference"); - columns[2].emplace_back(": "+float2str(percGold, "%.2f%%")); + columns[2].emplace_back(": "+util::float2str(percGold, "%.2f%%")); columns[3].emplace_back("("+std::to_string(errorsGold)+"/"+std::to_string(totalGold)+")"); } - printColumns(stderr, columns, 1); + util::printColumns(stderr, columns, 1); } int Error::getLinkLengthPrediction() const diff --git a/maca_common/include/util.hpp b/maca_common/include/util.hpp index d4339eb..4ff0b21 100644 --- a/maca_common/include/util.hpp +++ b/maca_common/include/util.hpp @@ -17,6 +17,8 @@ #include <string> #include <vector> +namespace util +{ /// @brief Whether or not this symbol can separate words (e.g. a space). /// /// @param c The symbol to check. @@ -229,8 +231,9 @@ int getEndIndexOfNthSymbolFrom(const std::string::iterator & s, const std::strin unsigned int getNbSymbols(const std::string & s); std::string shrinkString(const std::string & base, int maxSize, const std::string token); std::string strip(const std::string & s); +}; /// @brief Macro giving informations about an error. -#define ERRINFO (getFilenameFromPath(std::string(__FILE__))+ ":l." + std::to_string(__LINE__)).c_str() +#define ERRINFO (util::getFilenameFromPath(std::string(__FILE__))+ ":l." + std::to_string(__LINE__)).c_str() #endif diff --git a/maca_common/src/Dict.cpp b/maca_common/src/Dict.cpp index 724175f..8def5df 100644 --- a/maca_common/src/Dict.cpp +++ b/maca_common/src/Dict.cpp @@ -342,7 +342,7 @@ void Dict::initEmbeddingRandom(unsigned int index) int range = 1; for (auto & val : vec) - val = getRandomValueInRange(range); + val = util::getRandomValueInRange(range); lookupParameter.initialize(index, vec); } @@ -448,7 +448,7 @@ Dict * Dict::getDict(Policy policy, const std::string & filename) if(it != str2dict.end()) return it->second.get(); - Dict * dict = new Dict(removeSuffix(getFilenameFromPath(filename), ".dict"),policy, filename); + Dict * dict = new Dict(util::removeSuffix(util::getFilenameFromPath(filename), ".dict"),policy, filename); str2dict.insert(std::make_pair(dict->name, std::unique_ptr<Dict>(dict))); diff --git a/maca_common/src/ProgramOutput.cpp b/maca_common/src/ProgramOutput.cpp index bb2868a..8eceac1 100644 --- a/maca_common/src/ProgramOutput.cpp +++ b/maca_common/src/ProgramOutput.cpp @@ -17,7 +17,7 @@ void ProgramOutput::print(FILE * output) return; for (auto & line : matrix) for (unsigned int i = 0; i < line.size(); i++) - fprintf(output, "%s%s%s", ProgramParameters::printOutputEntropy ? ("<"+float2str(line[i].second,"%f")+">").c_str() : "", line[i].first.c_str(), i == line.size()-1 ? "\n" : "\t"); + fprintf(output, "%s%s%s", ProgramParameters::printOutputEntropy ? ("<"+util::float2str(line[i].second,"%f")+">").c_str() : "", line[i].first.c_str(), i == line.size()-1 ? "\n" : "\t"); } void ProgramOutput::addLine(FILE * output, const std::vector< std::pair<std::string, float> > & line, unsigned int index) diff --git a/maca_common/src/macaon_compute_l_rules.cpp b/maca_common/src/macaon_compute_l_rules.cpp index 1cd539f..475f20d 100644 --- a/maca_common/src/macaon_compute_l_rules.cpp +++ b/maca_common/src/macaon_compute_l_rules.cpp @@ -108,7 +108,7 @@ int main(int argc, char * argv[]) std::map<std::string, std::vector<std::string> > rules; while (fscanf(fplm.getDescriptor(), "%[^\n]\n", buffer) == 1) { - auto splited = split(buffer, '\t'); + auto splited = util::split(buffer, '\t'); if (splited.size() != 4) { @@ -118,7 +118,7 @@ int main(int argc, char * argv[]) auto form = splited[0]; auto lemma = splited[2]; - auto rule = getRule(form, lemma); + auto rule = util::getRule(form, lemma); rules[rule].emplace_back(buffer); } diff --git a/maca_common/src/macaon_convert_embeddings.cpp b/maca_common/src/macaon_convert_embeddings.cpp index 16f868a..1cbe55c 100644 --- a/maca_common/src/macaon_convert_embeddings.cpp +++ b/maca_common/src/macaon_convert_embeddings.cpp @@ -119,7 +119,7 @@ int main(int argc, char * argv[]) while (fscanf(input.getDescriptor(), "%[^\n]\n", buffer) == 1) { embedding.clear(); - auto splited = split(buffer, ' '); + auto splited = util::split(buffer, ' '); if ((int)splited.size() != embeddingsSize+1) { fprintf(stderr, "ERROR (%s) : line \'%s\' wrong format. Aborting.\n", ERRINFO, buffer); diff --git a/maca_common/src/programOptionsTemplates.cpp b/maca_common/src/programOptionsTemplates.cpp index 375fb00..7ffdeb6 100644 --- a/maca_common/src/programOptionsTemplates.cpp +++ b/maca_common/src/programOptionsTemplates.cpp @@ -232,8 +232,8 @@ macaon_decode --lang " + ProgramParameters::lang + " --tm machine.tm --bd test. if (system(("chmod +x " + ProgramParameters::expPath + "decode.sh").c_str())){} if (system(("ln -f -s " + ProgramParameters::expPath + "decode.sh " + ProgramParameters::langPath + "bin/maca_tm_" + ProgramParameters::expName).c_str())){} - ProgramParameters::tapeSize = getNbLines(ProgramParameters::trainFilename); - ProgramParameters::devTapeSize = ProgramParameters::devFilename.empty() ? 0 : getNbLines(ProgramParameters::devFilename); + ProgramParameters::tapeSize = util::getNbLines(ProgramParameters::trainFilename); + ProgramParameters::devTapeSize = ProgramParameters::devFilename.empty() ? 0 : util::getNbLines(ProgramParameters::devFilename); ProgramParameters::readSize = ProgramParameters::tapeSize; } @@ -331,10 +331,10 @@ void loadTrainProgramParameters(int argc, char * argv[]) std::string featureModels = vm["featureModels"].as<std::string>(); if (!featureModels.empty()) { - auto byClassifiers = split(featureModels, ','); + auto byClassifiers = util::split(featureModels, ','); for (auto & classifier : byClassifiers) { - auto parts = split(classifier, '='); + auto parts = util::split(classifier, '='); if (parts.size() != 2) { fprintf(stderr, "ERROR (%s) : wrong format for argument of option featureModels. Aborting.\n", ERRINFO); diff --git a/maca_common/src/util.cpp b/maca_common/src/util.cpp index e8c5cd3..1266a49 100644 --- a/maca_common/src/util.cpp +++ b/maca_common/src/util.cpp @@ -5,6 +5,7 @@ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.*/ + #include "util.hpp" #include "File.hpp" #include "utf8.hpp" @@ -12,6 +13,8 @@ #include <cstring> #include <ctime> +namespace util +{ bool isAlpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); @@ -242,7 +245,7 @@ std::vector<std::string> split(const std::string & s, char sep) res.back().push_back(s[i]); } - if (res.back().empty()) + if (!res.empty() && res.back().empty()) res.pop_back(); return res; @@ -587,3 +590,4 @@ std::string strip(const std::string & s) return res; } +}; diff --git a/neural_network/src/GeneticAlgorithm.cpp b/neural_network/src/GeneticAlgorithm.cpp index 92027d5..bb8f814 100644 --- a/neural_network/src/GeneticAlgorithm.cpp +++ b/neural_network/src/GeneticAlgorithm.cpp @@ -31,8 +31,8 @@ void GeneticAlgorithm::init(int nbInputs, const std::string & topology, int nbOu this->nbOutputs = nbOutputs; this->topology = topology; - auto splited = split(topology, ' '); - if (splited.size() != 2 || !isNum(splited[0])) + auto splited = util::split(topology, ' '); + if (splited.size() != 2 || !util::isNum(splited[0])) { fprintf(stderr, "ERROR (%s) : wrong topology \'%s\'. Aborting.\n", ERRINFO, topology.c_str()); exit(1); @@ -130,7 +130,7 @@ float GeneticAlgorithm::update(FeatureModel::FeatureDescription & fd, int gold) int parts = candidates.size() * ((candidates.size()+1) / 2.0); float probability = (candidates.size()-i) / (1.0*parts); - if (choiceWithProbability(probability)) + if (util::choiceWithProbability(probability)) { reproductors.push_back(candidates[i]); candidates.erase(candidates.begin() + i); @@ -266,7 +266,7 @@ void GeneticAlgorithm::Individual::becomeChildOf(Individual * other) unsigned int nbValues = thisParameter.values()->d.size(); for (unsigned int k = 0; k < nbValues; k++) - if (choiceWithProbability(0.5)) + if (util::choiceWithProbability(0.5)) thisValues[k] = otherValues[k]; } } @@ -295,7 +295,7 @@ void GeneticAlgorithm::Individual::becomeChildOf(Individual * mom, Individual * unsigned int nbValues = thisParameter.values()->d.size(); for (unsigned int k = 0; k < nbValues; k++) - if (choiceWithProbability(0.5)) + if (util::choiceWithProbability(0.5)) thisValues[k] = momValues[k]; else thisValues[k] = dadValues[k]; @@ -314,8 +314,8 @@ void GeneticAlgorithm::Individual::mutate(float probability) unsigned int nbValues = thisParameter.values()->d.size(); for (unsigned int k = 0; k < nbValues; k++) - if (choiceWithProbability(probability)) - thisValues[k] = getRandomValueInRange(3); + if (util::choiceWithProbability(probability)) + thisValues[k] = util::getRandomValueInRange(3); } } diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp index e2ec568..99ec2bc 100644 --- a/neural_network/src/MLP.cpp +++ b/neural_network/src/MLP.cpp @@ -32,7 +32,7 @@ void MLP::init(int nbInputs, const std::string & topology, int nbOutputs) dynet::Trainer * MLP::createTrainer() { - auto optimizer = noAccentLower(ProgramParameters::optimizer); + auto optimizer = util::noAccentLower(ProgramParameters::optimizer); dynet::Trainer * trainer = nullptr; diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp index f6dc7b0..ee65999 100644 --- a/neural_network/src/MLPBase.cpp +++ b/neural_network/src/MLPBase.cpp @@ -20,14 +20,14 @@ void MLPBase::init(dynet::ParameterCollection & model, int nbInputs, const std:: std::replace(topo.begin(), topo.end(), '(', ' '); std::replace(topo.begin(), topo.end(), ')', ' '); - auto groups = split(topo); + auto groups = util::split(topo); for (auto group : groups) { if(group.empty()) continue; std::replace(group.begin(), group.end(), ',', ' '); - auto layer = split(group); + auto layer = util::split(group); if (layer.size() != 3) { diff --git a/neural_network/src/MultiMLP.cpp b/neural_network/src/MultiMLP.cpp index 17cd835..6c06509 100644 --- a/neural_network/src/MultiMLP.cpp +++ b/neural_network/src/MultiMLP.cpp @@ -57,7 +57,7 @@ void MultiMLP::init(int nbInputs, const std::string & topology, int nbOutputs) dynet::Trainer * MultiMLP::createTrainer() { - auto optimizer = noAccentLower(ProgramParameters::optimizer); + auto optimizer = util::noAccentLower(ProgramParameters::optimizer); dynet::Trainer * trainer = nullptr; @@ -89,7 +89,7 @@ std::vector<float> MultiMLP::predict(FeatureModel::FeatureDescription & fd) std::vector<float> prediction(mlps.size()); for (unsigned int i = 0; i < mlps.size(); i++) { - int id = std::stoi(split(mlps[i].name, '_')[1]); + int id = std::stoi(util::split(mlps[i].name, '_')[1]); auto value = mlps[i].predict(fd); prediction[id] = exp(value[1]); totalSum += prediction[id]; @@ -107,7 +107,7 @@ float MultiMLP::update(FeatureModel::FeatureDescription & fd, int gold) for (auto & mlp : mlps) try { - int id = std::stoi(split(mlp.name, '_')[1]); + int id = std::stoi(util::split(mlp.name, '_')[1]); mlp.setBatchSize(getBatchSize()); if (gold >= 0) loss += mlp.update(fd, id == gold ? 1 : 0); @@ -135,7 +135,7 @@ float MultiMLP::getLoss(FeatureModel::FeatureDescription & fd, int gold) for (auto & mlp : mlps) try { - int id = std::stoi(split(mlp.name, '_')[1]); + int id = std::stoi(util::split(mlp.name, '_')[1]); mlp.setBatchSize(getBatchSize()); if (gold >= 0) loss += mlp.update(fd, id == gold ? 1 : 0); diff --git a/neural_network/src/NeuralNetwork.cpp b/neural_network/src/NeuralNetwork.cpp index 1c4911b..ee12fb5 100644 --- a/neural_network/src/NeuralNetwork.cpp +++ b/neural_network/src/NeuralNetwork.cpp @@ -17,7 +17,7 @@ std::string NeuralNetwork::expression2str(dynet::Expression & expr) auto elem = dynet::as_vector(expr.value()); for (auto & f : elem) - result += float2str(f, "%f") + " "; + result += util::float2str(f, "%f") + " "; if (!result.empty()) result.pop_back(); diff --git a/neural_network/src/ReversedMLP.cpp b/neural_network/src/ReversedMLP.cpp index b336b82..c0ca9ce 100644 --- a/neural_network/src/ReversedMLP.cpp +++ b/neural_network/src/ReversedMLP.cpp @@ -40,7 +40,7 @@ void ReversedMLP::init(int nbInputs, const std::string & topology, int nbOutputs dynet::Trainer * ReversedMLP::createTrainer() { - auto optimizer = noAccentLower(ProgramParameters::optimizer); + auto optimizer = util::noAccentLower(ProgramParameters::optimizer); dynet::Trainer * trainer = nullptr; diff --git a/trainer/src/TrainInfos.cpp b/trainer/src/TrainInfos.cpp index 1a97f90..b7a1c1b 100644 --- a/trainer/src/TrainInfos.cpp +++ b/trainer/src/TrainInfos.cpp @@ -15,7 +15,7 @@ TrainInfos::TrainInfos() lastEpoch = 0; lastSaved = 0; - if (fileExists(filename)) + if (util::fileExists(filename)) readFromFilename(); } @@ -37,7 +37,7 @@ void TrainInfos::readFromFilename() lastSaved = lastEpoch; while (fscanf(filePtr, "%[^\n]\n", buffer) == 1) { - auto splitted = split(buffer, '\t'); + auto splitted = util::split(buffer, '\t'); if (splitted.empty() || splitted[0] == "---") break; @@ -46,7 +46,7 @@ void TrainInfos::readFromFilename() } while (fscanf(filePtr, "%[^\n]\n", buffer) == 1) { - auto splitted = split(buffer, '\t'); + auto splitted = util::split(buffer, '\t'); if (splitted.empty() || splitted[0] == "---") break; @@ -55,7 +55,7 @@ void TrainInfos::readFromFilename() } while (fscanf(filePtr, "%[^\n]\n", buffer) == 1) { - auto splitted = split(buffer, '\t'); + auto splitted = util::split(buffer, '\t'); if (splitted.empty() || splitted[0] == "---") break; @@ -64,7 +64,7 @@ void TrainInfos::readFromFilename() } while (fscanf(filePtr, "%[^\n]\n", buffer) == 1) { - auto splitted = split(buffer, '\t'); + auto splitted = util::split(buffer, '\t'); if (splitted.empty() || splitted[0] == "---") break; @@ -73,7 +73,7 @@ void TrainInfos::readFromFilename() } while (fscanf(filePtr, "%[^\n]\n", buffer) == 1) { - auto splitted = split(buffer, '\t'); + auto splitted = util::split(buffer, '\t'); if (splitted.empty() || splitted[0] == "---") break; @@ -198,9 +198,9 @@ void TrainInfos::computeTrainScores(Config & c) char buffer[10000]; while (fscanf(evalFromUD, "%[^\n]\n", buffer) == 1) { - auto splited = split(buffer, '|'); + auto splited = util::split(buffer, '|'); if (splited.size() > 2) - scoresStr[strip(splited[0])] = strip(splited[3]); + scoresStr[util::strip(splited[0])] = util::strip(splited[3]); } pclose(evalFromUD); } @@ -220,7 +220,7 @@ void TrainInfos::computeTrainScores(Config & c) addTrainScore(it.first, scoresFloat["UFeats"]); else if (it.first == "Lemmatizer_Rules") addTrainScore(it.first, scoresFloat["Lemmas"]); - else if (split(it.first, '_')[0] == "Error") + else if (util::split(it.first, '_')[0] == "Error") addTrainScore(it.first, 100.0); else { @@ -254,9 +254,9 @@ void TrainInfos::computeDevScores(Config & c) char buffer[10000]; while (fscanf(evalFromUD, "%[^\n]\n", buffer) == 1) { - auto splited = split(buffer, '|'); + auto splited = util::split(buffer, '|'); if (splited.size() > 2) - scoresStr[strip(splited[0])] = strip(splited[3]); + scoresStr[util::strip(splited[0])] = util::strip(splited[3]); } pclose(evalFromUD); } @@ -276,7 +276,7 @@ void TrainInfos::computeDevScores(Config & c) addDevScore(it.first, scoresFloat["UFeats"]); else if (it.first == "Lemmatizer_Rules") addDevScore(it.first, scoresFloat["Lemmas"]); - else if (split(it.first, '_')[0] == "Error") + else if (util::split(it.first, '_')[0] == "Error") addDevScore(it.first, 100.0); else { @@ -358,20 +358,20 @@ void TrainInfos::printScores(FILE * output) { names.emplace_back(it.first); acc.emplace_back("accuracy"); - train.emplace_back(": train(" + float2str(it.second.back(), "%.2f") + "%)"); - lossTrain.emplace_back(trainLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(trainLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")"); - dev.emplace_back(devScoresPerClassifierPerEpoch.empty() ? "" : "dev(" +float2str(devScoresPerClassifierPerEpoch[it.first].back(), "%.2f") + "%)"); - lossDev.emplace_back(devLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +float2str(devLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")"); + train.emplace_back(": train(" + util::float2str(it.second.back(), "%.2f") + "%)"); + lossTrain.emplace_back(trainLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +util::float2str(trainLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")"); + dev.emplace_back(devScoresPerClassifierPerEpoch.empty() ? "" : "dev(" +util::float2str(devScoresPerClassifierPerEpoch[it.first].back(), "%.2f") + "%)"); + lossDev.emplace_back(devLossesPerClassifierPerEpoch.empty() ? "loss(?)" : "loss(" +util::float2str(devLossesPerClassifierPerEpoch[it.first].back(), "%.2f") + ")"); savedStr.emplace_back(mustSavePerClassifierPerEpoch[it.first].back() ? "SAVED" : ""); } if (ProgramParameters::interactive) fprintf(output, " \r"); if (ProgramParameters::printTime) - fprintf(output, "[%s] ", getTime().c_str()); + fprintf(output, "[%s] ", util::getTime().c_str()); fprintf(output, "Iteration %d/%d : \n", getEpoch(), ProgramParameters::nbIter); - printColumns(output, {names, acc, train, lossTrain, dev, lossDev, savedStr}); + util::printColumns(output, {names, acc, train, lossTrain, dev, lossDev, savedStr}); } bool TrainInfos::mustSave(const std::string & classifier) diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index 8e60bae..30c5352 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -35,7 +35,7 @@ void Trainer::setDebugValue() if (!ProgramParameters::randomDebug) return; - ProgramParameters::debug = choiceWithProbability(ProgramParameters::randomDebugProbability); + ProgramParameters::debug = util::choiceWithProbability(ProgramParameters::randomDebugProbability); if (!ProgramParameters::debug) return; @@ -46,7 +46,7 @@ void Trainer::setDebugValue() fprintf(stderr, "\n"); if (ProgramParameters::printTime) - fprintf(stderr, "[%s] :\n", getTime().c_str()); + fprintf(stderr, "[%s] :\n", util::getTime().c_str()); } void Trainer::computeScoreOnDev() @@ -106,7 +106,7 @@ void Trainer::computeScoreOnDev() if (steps && (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff)) { fprintf(stderr, " \r"); - fprintf(stderr, "Eval on dev : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str()); + fprintf(stderr, "Eval on dev : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, util::int2humanStr((int)currentSpeed).c_str()); } } @@ -151,7 +151,7 @@ void Trainer::computeScoreOnDev() if (ProgramParameters::debug) { - fprintf(stderr, "Speed : %s actions/s\n", int2humanStr((int)currentSpeed).c_str()); + fprintf(stderr, "Speed : %s actions/s\n", util::int2humanStr((int)currentSpeed).c_str()); devConfig->printForDebug(stderr); fprintf(stderr, "pAction=<%s> action=<%s>\n", pAction.c_str(), actionName.c_str()); } @@ -228,7 +228,7 @@ void Trainer::doStepNoTrain() if (ProgramParameters::debug) { - fprintf(stderr, "Speed : %s actions/s\n", int2humanStr((int)currentSpeed).c_str()); + fprintf(stderr, "Speed : %s actions/s\n", util::int2humanStr((int)currentSpeed).c_str()); trainConfig.printForDebug(stderr); fprintf(stderr, "action=<%s>\n", neededActionName.c_str()); } @@ -266,7 +266,7 @@ void Trainer::doStepTrain() if (steps % nbActionsCutoff == 0 || totalSize-steps < nbActionsCutoff) { fprintf(stderr, " \r"); - fprintf(stderr, "Current Iteration : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, int2humanStr((int)currentSpeed).c_str()); + fprintf(stderr, "Current Iteration : %.2f%% speed : %s actions/s\r", 100.0*steps/totalSize, util::int2humanStr((int)currentSpeed).c_str()); } } @@ -339,7 +339,7 @@ void Trainer::doStepTrain() fprintf(stdout, "%s\t%s\t%s\n", tm.getCurrentClassifier()->getFeatureModel()->filename.c_str(), oAction.c_str(), features.c_str()); } - if (tm.getCurrentClassifier()->isDynamic() && TI.getEpoch() >= k && choiceWithProbability(ProgramParameters::dynamicProbability)) + if (tm.getCurrentClassifier()->isDynamic() && TI.getEpoch() >= k && util::choiceWithProbability(ProgramParameters::dynamicProbability)) { actionName = pAction; TI.lastActionWasPredicted[trainConfig.getCurrentStateName()] = true; @@ -352,7 +352,7 @@ void Trainer::doStepTrain() if (ProgramParameters::debug) { - fprintf(stderr, "Speed : %s actions/s\n", int2humanStr((int)currentSpeed).c_str()); + fprintf(stderr, "Speed : %s actions/s\n", util::int2humanStr((int)currentSpeed).c_str()); trainConfig.printForDebug(stderr); tm.getCurrentClassifier()->printWeightedActions(stderr, weightedActions, 10); fprintf(stderr, "pAction=<%s> oAction=<%s> action=<%s>\n", pAction.c_str(), oAction.c_str(), actionName.c_str()); @@ -410,7 +410,7 @@ void Trainer::doStepTrain() auto & normalHistory = trainConfig.getActionsHistory(normalStateName); // If a BACK just happened - if (normalHistory.size() > 1 && trainConfig.getCurrentStateHistory().size() > 0 && split(trainConfig.getCurrentStateHistory().top(), ' ')[0] == "BACK" && TI.getEpoch() >= ProgramParameters::dynamicEpoch) + if (normalHistory.size() > 1 && trainConfig.getCurrentStateHistory().size() > 0 && util::split(trainConfig.getCurrentStateHistory().top(), ' ')[0] == "BACK" && TI.getEpoch() >= ProgramParameters::dynamicEpoch) { auto & lastAction = trainConfig.lastUndoneAction; auto & newAction = normalHistory[normalHistory.size()-1]; @@ -476,7 +476,7 @@ void Trainer::doStepTrain() if (ProgramParameters::debug) { - fprintf(stderr, "Speed : %s actions/s\n", int2humanStr((int)currentSpeed).c_str()); + fprintf(stderr, "Speed : %s actions/s\n", util::int2humanStr((int)currentSpeed).c_str()); trainConfig.printForDebug(stderr); tm.getCurrentClassifier()->printWeightedActions(stderr, weightedActions, 10); fprintf(stderr, "pAction=<%s> oAction=<%s> action=<%s>\n", pAction.c_str(), oAction.c_str(), actionName.c_str()); @@ -542,7 +542,7 @@ void Trainer::train() Dict::createFiles(ProgramParameters::expPath, ""); fprintf(stderr, "%sTraining of \'%s\' :\n", - ProgramParameters::printTime ? ("["+getTime()+"] ").c_str() : "", + ProgramParameters::printTime ? ("["+util::getTime()+"] ").c_str() : "", tm.name.c_str()); while (TI.getEpoch() <= ProgramParameters::nbIter) diff --git a/transition_machine/src/Action.cpp b/transition_machine/src/Action.cpp index b997a4b..2a083bc 100644 --- a/transition_machine/src/Action.cpp +++ b/transition_machine/src/Action.cpp @@ -99,7 +99,7 @@ void Action::undoOnlyStack(Config & config) Action::Action(const std::string & name) { this->name = name; - for(unsigned int i = 0; i < name.size() && !isSeparator(name[i]); i++) + for(unsigned int i = 0; i < name.size() && !util::isSeparator(name[i]); i++) this->namePrefix.push_back(name[i]); this->sequence = ActionBank::str2sequence(name); diff --git a/transition_machine/src/ActionBank.cpp b/transition_machine/src/ActionBank.cpp index 22b1406..047466c 100644 --- a/transition_machine/src/ActionBank.cpp +++ b/transition_machine/src/ActionBank.cpp @@ -67,7 +67,7 @@ Action::BasicAction ActionBank::checkRawInputHeadIsSpace() {}; auto appliable = [](Config & c, Action::BasicAction &) { - return isUtf8Space(c.rawInput.begin()+c.rawInputHeadIndex); + return util::isUtf8Space(c.rawInput.begin()+c.rawInputHeadIndex); }; Action::BasicAction basicAction = {Action::BasicAction::Type::Write, "", apply, undo, appliable}; @@ -83,7 +83,7 @@ Action::BasicAction ActionBank::checkRawInputHeadIsSeparator() {}; auto appliable = [](Config & c, Action::BasicAction &) { - return isUtf8Separator(c.rawInput.begin()+c.rawInputHeadIndex); + return util::isUtf8Separator(c.rawInput.begin()+c.rawInputHeadIndex); }; Action::BasicAction basicAction = {Action::BasicAction::Type::Write, "", apply, undo, appliable}; @@ -191,7 +191,7 @@ Action::BasicAction ActionBank::stackPop(bool checkGov) if (!checkGov) return true; - return split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '.').size() > 1 || split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '-').size() > 1 || (!c.getTape("GOV").getHyp(c.stackTop()-c.getHead()).empty() && c.stackTop() != c.getHead()); + return util::split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '.').size() > 1 || util::split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '-').size() > 1 || (!c.getTape("GOV").getHyp(c.stackTop()-c.getHead()).empty() && c.stackTop() != c.getHead()); }; Action::BasicAction basicAction = {Action::BasicAction::Type::Pop, "", apply, undo, appliable}; @@ -241,7 +241,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na std::string tapeName(b2); std::string value(b3); - auto object = split(b4, '.'); + auto object = util::split(b4, '.'); if (object.size() != 2) invalidNameAndAbort(ERRINFO); @@ -263,7 +263,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na std::string tapeName(b2); - auto splits = split(name); + auto splits = util::split(name); for(int i = startRelIndex; i <= endRelIndex; i++) sequence.emplace_back(bufferWrite(tapeName, splits[4+i-startRelIndex], i)); @@ -347,8 +347,8 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na if (sscanf(name.c_str(), "SPLITWORD %s", b2) != 1) invalidNameAndAbort(ERRINFO); - auto splited = split(b2, '@'); - int nbSymbols = getNbSymbols(splited[0]); + auto splited = util::split(b2, '@'); + int nbSymbols = util::getNbSymbols(splited[0]); sequence.emplace_back(rawInputBeginsWith(splited[0])); @@ -436,13 +436,13 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na int b0 = c.getHead(); int s0 = c.stackTop(); - if (split(c.getTape("ID").getRef(0), '-').size() > 1) + if (util::split(c.getTape("ID").getRef(0), '-').size() > 1) return false; - if (split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '-').size() > 1) + if (util::split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '-').size() > 1) return false; - if (split(c.getTape("ID").getRef(0), '.').size() > 1) + if (util::split(c.getTape("ID").getRef(0), '.').size() > 1) return false; - if (split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '.').size() > 1) + if (util::split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '.').size() > 1) return false; return simpleBufferWriteAppliable(c, "GOV", s0-b0); @@ -498,13 +498,13 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na { if (c.stackEmpty()) return false; - if (split(c.getTape("ID").getRef(0), '-').size() > 1) + if (util::split(c.getTape("ID").getRef(0), '-').size() > 1) return false; - if (split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '-').size() > 1) + if (util::split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '-').size() > 1) return false; - if (split(c.getTape("ID").getRef(0), '.').size() > 1) + if (util::split(c.getTape("ID").getRef(0), '.').size() > 1) return false; - if (split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '.').size() > 1) + if (util::split(c.getTape("ID").getRef(c.stackTop()-c.getHead()), '.').size() > 1) return false; return simpleBufferWriteAppliable(c, "GOV", 0); }; @@ -570,9 +570,9 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na for (int i = c.stackSize()-1; i >= 0; i--) { auto s = c.stackGetElem(i); - if (split(ids.getRef(s-b0), '-').size() > 1) + if (util::split(ids.getRef(s-b0), '-').size() > 1) continue; - if (split(ids.getRef(s-b0), '.').size() > 1) + if (util::split(ids.getRef(s-b0), '.').size() > 1) continue; if (govs.getHyp(s-b0).empty() || govs.getHyp(s-b0) == "0") { @@ -620,9 +620,9 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na for (int i = sentenceStart; i <= sentenceEnd; i++) { - if (split(ids.getRef(i-b0), '-').size() > 1) + if (util::split(ids.getRef(i-b0), '-').size() > 1) continue; - if (split(ids.getRef(i-b0), '.').size() > 1) + if (util::split(ids.getRef(i-b0), '.').size() > 1) continue; if (govs.getHyp(i-b0).empty()) { @@ -660,7 +660,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na break; } } - auto deps = split(ba.data, '+'); + auto deps = util::split(ba.data, '+'); for (auto s : deps) if (!s.empty()) { @@ -693,7 +693,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na }; auto undo4 = [](Config & c, Action::BasicAction & ba) { - auto elems = split(ba.data); + auto elems = util::split(ba.data); for (auto elem : elems) if (!elem.empty()) c.stackPush(std::stoi(elem)); @@ -730,7 +730,7 @@ std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & na if (sscanf(name.c_str(), "%s %s", b1, b2) != 2) invalidNameAndAbort(ERRINFO); - if (isNum(b2)) + if (util::isNum(b2)) { int dist = std::stoi(b2); @@ -869,7 +869,7 @@ bool ActionBank::isRuleAppliable(Config & config, { if (!simpleBufferWriteAppliable(config, tapeName, relativeIndex)) return false; - return ruleIsAppliable(config.getTape(tapeName)[relativeIndex], rule); + return util::ruleIsAppliable(config.getTape(tapeName)[relativeIndex], rule); } void ActionBank::writeRuleResult(Config & config, const std::string & fromTapeName, const std::string & targetTapeName, const std::string & rule, int relativeIndex) @@ -879,7 +879,7 @@ void ActionBank::writeRuleResult(Config & config, const std::string & fromTapeNa auto & from = fromTape.getRef(relativeIndex); - toTape.setHyp(relativeIndex, applyRule(from, rule)); + toTape.setHyp(relativeIndex, util::applyRule(from, rule)); } void ActionBank::addCharToBuffer(Config & config, const std::string & tapeName, int relativeIndex) @@ -887,7 +887,7 @@ void ActionBank::addCharToBuffer(Config & config, const std::string & tapeName, auto & tape = config.getTape(tapeName); auto & from = tape.getHyp(relativeIndex); - int nbChar = getEndIndexOfNthSymbolFrom(config.rawInput.begin()+config.rawInputHeadIndex,config.rawInput.end(), 0)+1; + int nbChar = util::getEndIndexOfNthSymbolFrom(config.rawInput.begin()+config.rawInputHeadIndex,config.rawInput.end(), 0)+1; std::string suffix = std::string(config.rawInput.begin()+config.rawInputHeadIndex, config.rawInput.begin()+config.rawInputHeadIndex+nbChar); @@ -899,7 +899,7 @@ void ActionBank::removeCharFromBuffer(Config & config, const std::string & tapeN auto & tape = config.getTape(tapeName); auto from = tape.getRef(relativeIndex); - std::string suffix = std::string(config.rawInput.begin()+config.rawInputHeadIndex, config.rawInput.begin()+config.rawInputHeadIndex+getEndIndexOfNthSymbolFrom(config.rawInput.begin()+config.rawInputHeadIndex,config.rawInput.end(), 0)); + std::string suffix = std::string(config.rawInput.begin()+config.rawInputHeadIndex, config.rawInput.begin()+config.rawInputHeadIndex+util::getEndIndexOfNthSymbolFrom(config.rawInput.begin()+config.rawInputHeadIndex,config.rawInput.end(), 0)); for (char c : suffix) from.pop_back(); @@ -909,7 +909,7 @@ void ActionBank::removeCharFromBuffer(Config & config, const std::string & tapeN int ActionBank::getLinkLength(const Config & c, const std::string & action) { - auto splitted = split(action, ' '); + auto splitted = util::split(action, ' '); auto & name = splitted[0]; if (name == "LEFT" || name == "RIGHT" || name == "EOS") { diff --git a/transition_machine/src/ActionSet.cpp b/transition_machine/src/ActionSet.cpp index 1b0afa3..999051a 100644 --- a/transition_machine/src/ActionSet.cpp +++ b/transition_machine/src/ActionSet.cpp @@ -48,7 +48,7 @@ ActionSet::ActionSet(const std::string & filename, bool isDynamic) break; } - this->name = getFilenameFromPath(filename); + this->name = util::getFilenameFromPath(filename); } else { diff --git a/transition_machine/src/BD.cpp b/transition_machine/src/BD.cpp index 8afbc2a..722f55b 100644 --- a/transition_machine/src/BD.cpp +++ b/transition_machine/src/BD.cpp @@ -68,13 +68,13 @@ BD::BD(const std::string & BDfilename, const std::string & MCDfilename) exit(1); } - if(noAccentLower(refHyp) != std::string("ref") && noAccentLower(refHyp) != std::string("hyp")) + if(util::noAccentLower(refHyp) != std::string("ref") && util::noAccentLower(refHyp) != std::string("hyp")) { fprintf(stderr, "ERROR (%s) : \'%s\' is not a valid BD line argument. Aborting.\n", ERRINFO, refHyp); exit(1); } - bool known = noAccentLower(refHyp) == std::string("ref"); + bool known = util::noAccentLower(refHyp) == std::string("ref"); int inputColumn = mcdStr2Col.find(name) == mcdStr2Col.end() ? -1 : mcdStr2Col[name]; diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index 3b2d872..67957a9 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -159,7 +159,7 @@ void Classifier::initClassifier(Config & config) return; std::string modelFilename = ProgramParameters::expPath + name + ".model"; - if (fileExists(modelFilename)) + if (util::fileExists(modelFilename)) { nn.reset(createNeuralNetwork(modelFilename)); Dict::initDicts(nn->getModel(), name); @@ -398,7 +398,7 @@ float Classifier::computeEntropy(WeightedActions & wa) NeuralNetwork * Classifier::createNeuralNetwork() { - auto splited = split(topology, ' '); + auto splited = util::split(topology, ' '); if (splited.size() == 2) return new GeneticAlgorithm(); @@ -414,7 +414,7 @@ NeuralNetwork * Classifier::createNeuralNetwork() NeuralNetwork * Classifier::createNeuralNetwork(const std::string & modelFilename) { - auto splited = split(topology, ' '); + auto splited = util::split(topology, ' '); if (splited.size() == 2) return new GeneticAlgorithm(modelFilename); diff --git a/transition_machine/src/Config.cpp b/transition_machine/src/Config.cpp index d6848d6..b5babe2 100644 --- a/transition_machine/src/Config.cpp +++ b/transition_machine/src/Config.cpp @@ -102,11 +102,17 @@ void Config::readInput() FILE * fd = file->getDescriptor(); char buffer[100000]; + buffer[0] = '\0'; int lineIndex = 0; - while (fscanf(fd, "%[^\n]\n", buffer) == 1) + while (!std::feof(fd)) { + if (buffer != std::fgets(buffer, 100000, fd)) + break; + if (buffer[std::strlen(buffer)-1] == '\n') + buffer[std::strlen(buffer)-1] = '\0'; + lineIndex++; if (!utf8::is_valid(buffer, buffer+std::strlen(buffer))) @@ -116,12 +122,28 @@ void Config::readInput() } if (std::strlen(buffer) <= 3) + { + if (inputContent.empty() || !inputContent.back().empty()) + inputContent.emplace_back(); continue; + } + + auto splited = util::split(buffer, '='); + + if (splited[0] == "# sent_id ") + { + if (inputContent.empty() || !inputContent.back().empty()) + inputContent.emplace_back(); + } + else + { + std::string prefix = splited[0]; + if (buffer[0] == '#' && prefix != "# text ") + continue; + } - if (split(buffer, '=')[0] == "# sent_id ") + if (inputContent.empty()) inputContent.emplace_back(); - else if (buffer[0] == '#' && split(buffer, '=')[0] != "# text ") - continue; inputContent.back().emplace_back(buffer); } @@ -145,12 +167,12 @@ void Config::fillTapesWithInput() for (unsigned int wordIndex = 0; wordIndex < sentence.size(); wordIndex++) { auto & word = sentence[wordIndex]; - if (split(word, '=')[0] == "# text ") + if (util::split(word, '=')[0] == "# text ") { std::string prefix = rawInput.empty() ? "" : " "; - if (choiceWithProbability(0.3)) + if (util::choiceWithProbability(0.3)) prefix = "\n"; - else if (choiceWithProbability(0.3)) + else if (util::choiceWithProbability(0.3)) prefix = ""; if (rawInput.empty()) prefix = ""; @@ -160,7 +182,7 @@ void Config::fillTapesWithInput() else if (word[0] == '#') continue; - cols = split(word, '\t'); + cols = util::split(word, '\t'); if (!usualColsSize) usualColsSize = cols.size(); @@ -179,7 +201,7 @@ void Config::fillTapesWithInput() tape.addToHyp(""); if (tape.getName() == ProgramParameters::tapeToMask) - if (choiceWithProbability(ProgramParameters::maskRate)) + if (util::choiceWithProbability(ProgramParameters::maskRate)) tape.maskIndex(tape.refSize()-1); if (tape.getName() == ProgramParameters::sequenceDelimiterTape) { @@ -193,9 +215,9 @@ void Config::fillTapesWithInput() for (int word = sentenceStartIndex; hasGov && word < ids.refSize(); word++) { - if (split(ids.getRef(word), '-').size() > 1) + if (util::split(ids.getRef(word), '-').size() > 1) continue; - if (split(ids.getRef(word), '.').size() > 1) + if (util::split(ids.getRef(word), '.').size() > 1) continue; if (govs.getRef(word) == "0") continue; @@ -269,7 +291,7 @@ void Config::printForDebug(FILE * output) cols[colIndex].emplace_back(i == head ? " || " : ""); } - cols[colIndex].emplace_back(shrinkString(tape[i-head], 10, "..")); + cols[colIndex].emplace_back(util::shrinkString(tape[i-head], 10, "..")); } } @@ -282,7 +304,7 @@ void Config::printForDebug(FILE * output) if (!rawInput.empty()) { int rawWindow = 30; - int relativeHeadIndex = getEndIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.end(), rawWindow); + int relativeHeadIndex = util::getEndIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.end(), rawWindow); auto endIter = rawInput.begin() + rawInputHeadIndex + relativeHeadIndex + 1; if (relativeHeadIndex < 0) endIter = rawInput.end(); @@ -294,7 +316,7 @@ void Config::printForDebug(FILE * output) fprintf(output, "-%s", i == 80-1 ? "\n" : ""); } - printColumns(output, cols, 3); + util::printColumns(output, cols, 3); fprintf(output, "Stack : "); for(int s : stack) @@ -366,11 +388,11 @@ void Config::moveHead(int mvt) if (mvt > 0) for (int i = 0; i < mvt; i++) - if (hasTape("ID") && split(getTape("ID").getHyp(i), '-').size() <= 1) + if (hasTape("ID") && util::split(getTape("ID").getHyp(i), '-').size() <= 1) currentWordIndex += 1; if (mvt < 0) for (int i = 0; i < mvt; i++) - if (hasTape("ID") && split(getTape("ID").getHyp(-i), '-').size() <= 1) + if (hasTape("ID") && util::split(getTape("ID").getHyp(-i), '-').size() <= 1) currentWordIndex -= 1; for (auto & tape : tapes) @@ -387,7 +409,7 @@ void Config::moveRawInputHead(int mvt) { if (mvt >= 0) { - int relativeIndexMvt = getStartIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.end(), mvt); + int relativeIndexMvt = util::getStartIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.end(), mvt); if (relativeIndexMvt > 0) { rawInputHead += mvt; @@ -396,7 +418,7 @@ void Config::moveRawInputHead(int mvt) } else { - int relativeIndexMvt = getStartIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.begin(), mvt); + int relativeIndexMvt = util::getStartIndexOfNthSymbolFrom(rawInput.begin()+rawInputHeadIndex, rawInput.begin(), mvt); if (relativeIndexMvt < 0) { rawInputHeadIndex += relativeIndexMvt; @@ -637,7 +659,7 @@ void Config::Tape::moveHead(int mvt) bool Config::endOfTapes() const { - return inputAllRead && (tapes[0].headIsAtEnd() || rawInputHeadIndex >= (int)rawInput.size()); + return inputAllRead && (tapes[0].headIsAtEnd() || (rawInput.size() > 0 && rawInputHeadIndex >= (int)rawInput.size())); } bool Config::Tape::headIsAtEnd() const @@ -778,12 +800,12 @@ void Config::addToActionsHistory(std::string & state, const std::string & action { try { - if (std::stoi(split(it->first, '_').back()) < head-20) + if (std::stoi(util::split(it->first, '_').back()) < head-20) { it = actionsHistory.erase(it); continue; } - } catch (std::exception &) {fprintf(stderr, "ERROR (%s) : calling std::stoi on \'%s\'.aborting.\n", ERRINFO, split(it->first, '_').back().c_str()); exit(1);} + } catch (std::exception &) {fprintf(stderr, "ERROR (%s) : calling std::stoi on \'%s\'.aborting.\n", ERRINFO, util::split(it->first, '_').back().c_str()); exit(1);} it++; } @@ -883,10 +905,10 @@ void Config::updateIdsInSequence() int digitIndex = 1; for (int i = sentenceStart; i <= sentenceEnd; i++) { - auto splited = split(ids.getRef(i-getHead()), '-'); + auto splited = util::split(ids.getRef(i-getHead()), '-'); if (splited.size() == 1) { - auto splited2 = split(ids.getRef(i-getHead()), '.'); + auto splited2 = util::split(ids.getRef(i-getHead()), '.'); if (splited2.size() == 1) { ids.setHyp(i-getHead(), std::to_string(curId++)); diff --git a/transition_machine/src/FeatureBank.cpp b/transition_machine/src/FeatureBank.cpp index b687705..6c31cca 100644 --- a/transition_machine/src/FeatureBank.cpp +++ b/transition_machine/src/FeatureBank.cpp @@ -219,10 +219,10 @@ FeatureModel::FeatureValue getDistance(int index1, int index2, const std::string std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const std::string & s) { - if (split(s,'.')[0] == "raw") + if (util::split(s,'.')[0] == "raw") { int relativeIndex; - try {relativeIndex = std::stoi(split(s, '.')[1]);} + try {relativeIndex = std::stoi(util::split(s, '.')[1]);} catch (std::exception &) { fprintf(stderr, "ERROR (%s) : invalid feature format \'%s\'. Relative index must be an integer. Aborting.\n", ERRINFO, s.c_str()); @@ -230,7 +230,7 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const } return [relativeIndex, s](Config & c) { - int relativeCharIndex = getStartIndexOfNthSymbolFrom(c.rawInput.begin()+c.rawInputHeadIndex, relativeIndex >= 0 ? c.rawInput.end() : c.rawInput.begin(), relativeIndex); + int relativeCharIndex = util::getStartIndexOfNthSymbolFrom(c.rawInput.begin()+c.rawInputHeadIndex, relativeIndex >= 0 ? c.rawInput.end() : c.rawInput.begin(), relativeIndex); Dict * dict = Dict::getDict("letters"); auto policy = dictPolicy2FeaturePolicy(dict->policy); @@ -240,7 +240,7 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const if (relativeCharIndex < 0 && relativeIndex >= 0) return FeatureModel::FeatureValue({dict, s, Dict::nullValueStr, policy}); - int endIndex = getEndIndexOfNthSymbolFrom(c.rawInput.begin()+c.rawInputHeadIndex+relativeCharIndex, c.rawInput.end(), 0); + int endIndex = util::getEndIndexOfNthSymbolFrom(c.rawInput.begin()+c.rawInputHeadIndex+relativeCharIndex, c.rawInput.end(), 0); auto a = c.rawInput.begin()+c.rawInputHeadIndex+relativeCharIndex; auto b = a + endIndex + 1; @@ -259,11 +259,11 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const }; } - auto splited = split(s, '#'); + auto splited = util::split(s, '#'); if (splited.size() == 1) { - splited = split(splited[0], '.'); + splited = util::split(splited[0], '.'); int index = -1; try {index = std::stoi(splited[1]);} catch(std::exception &) @@ -301,7 +301,7 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const exit(1); } - splited = split(target, '.'); + splited = util::split(target, '.'); if (splited.size() < 2) { fprintf(stderr, "ERROR (%s) : invalid feature format \'%s\'. Aborting.\n", ERRINFO, s.c_str()); @@ -357,7 +357,7 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const return context; }; - splited = split(feature, '.'); + splited = util::split(feature, '.'); feature = splited[0]; @@ -462,7 +462,7 @@ FeatureModel::FeatureValue FeatureBank::getUppercase(Config &, const FeatureMode { Dict * dict = Dict::getDict("bool"); auto policy = dictPolicy2FeaturePolicy(dict->policy); - bool firstLetterUppercase = isUpper(fv.values[0][0]); + bool firstLetterUppercase = util::isUpper(fv.values[0][0]); if(fv.values[0] == Dict::nullValueStr) return {dict, fv.names[0], Dict::nullValueStr, policy}; @@ -476,7 +476,7 @@ FeatureModel::FeatureValue FeatureBank::getLength(Config &, const FeatureModel:: { Dict * dict = Dict::getDict("int"); auto policy = dictPolicy2FeaturePolicy(dict->policy); - int len = lengthPrinted(fv.values[0]); + int len = util::lengthPrinted(fv.values[0]); if(fv.values[0] == Dict::nullValueStr) return {dict, fv.names[0], Dict::nullValueStr, policy}; @@ -498,7 +498,7 @@ FeatureModel::FeatureValue FeatureBank::getLetters(Config &, const FeatureModel: if(fv.values[0] == Dict::nullValueStr) return {dict, fv.names[0], Dict::nullValueStr, policy}; - int nbSymbols = getNbSymbols(fv.values[0]); + int nbSymbols = util::getNbSymbols(fv.values[0]); if(from < 0) from = nbSymbols + from; @@ -514,8 +514,8 @@ FeatureModel::FeatureValue FeatureBank::getLetters(Config &, const FeatureModel: exit(1); } - int start = getStartIndexOfNthSymbol(fv.values[0], from); - int end = getEndIndexOfNthSymbol(fv.values[0], to); + int start = util::getStartIndexOfNthSymbol(fv.values[0], from); + int end = util::getEndIndexOfNthSymbol(fv.values[0], to); std::string letters; for(int i = start; i <= end; i++) @@ -637,7 +637,7 @@ FeatureModel::FeatureValue FeatureBank::fasttext(Config & c, const FeatureModel: if(word.values[0] == Dict::nullValueStr) return {lettersDict, word.names[0], Dict::nullValueStr, policy}; - unsigned int wordLength = getNbSymbols(word.values[0]); + unsigned int wordLength = util::getNbSymbols(word.values[0]); unsigned int gramLength = 4; bool slidingMode = false; diff --git a/transition_machine/src/FeatureModel.cpp b/transition_machine/src/FeatureModel.cpp index 40f8db0..a1aabce 100644 --- a/transition_machine/src/FeatureModel.cpp +++ b/transition_machine/src/FeatureModel.cpp @@ -28,7 +28,7 @@ FeatureModel::FeatureDescription & FeatureModel::getFeatureDescription(Config & FeatureModel::FeatureModel(const std::string & filename) { - this->filename = getFilenameFromPath(filename); + this->filename = util::getFilenameFromPath(filename); File file(filename, "r"); FILE * fd = file.getDescriptor(); @@ -67,7 +67,7 @@ std::string FeatureModel::FeatureValue::toString(unsigned int i) unsigned int dim = dict->getDimension(); for (unsigned int j = 0; j < dim; j++) - result += " " + float2str(realVector[j], "%5.2f"); + result += " " + util::float2str(realVector[j], "%5.2f"); return result; } @@ -81,9 +81,9 @@ std::string FeatureModel::FeatureDescription::toString() for(auto featValue : values) for (unsigned int i = 0; i < featValue.dicts.size(); i++) { - int size1 = lengthPrinted(featValue.names[i])+15; - int size2 = lengthPrinted(" " + std::string(featValue.values[i])); - int size3 = lengthPrinted(" " + featValue.toString(i)); + int size1 = util::lengthPrinted(featValue.names[i])+15; + int size2 = util::lengthPrinted(" " + std::string(featValue.values[i])); + int size3 = util::lengthPrinted(" " + featValue.toString(i)); columnSizes[0] = std::max(columnSizes[0], size1); columnSizes[1] = std::max(columnSizes[1], size2); @@ -95,7 +95,7 @@ std::string FeatureModel::FeatureDescription::toString() totalLength += n; std::string column; - while ((int)lengthPrinted(column) < totalLength){column.push_back('-');} + while ((int)util::lengthPrinted(column) < totalLength){column.push_back('-');} column += "\n"; res += column; @@ -103,10 +103,10 @@ std::string FeatureModel::FeatureDescription::toString() for (unsigned int i = 0; i < featValue.dicts.size(); i++) { column = "FeatureValue : " + featValue.names[i]; - while ((int)lengthPrinted(column) < columnSizes[0]){column.push_back(' ');} + while ((int)util::lengthPrinted(column) < columnSizes[0]){column.push_back(' ');} res += column; column = " " + std::string(featValue.values[i]); - while ((int)lengthPrinted(column) < columnSizes[1]){column.push_back(' ');} + while ((int)util::lengthPrinted(column) < columnSizes[1]){column.push_back(' ');} res += column; column = " " + featValue.toString(i); res += column; @@ -114,7 +114,7 @@ std::string FeatureModel::FeatureDescription::toString() } column.clear(); - while ((int)lengthPrinted(column) < totalLength){column.push_back('-');} + while ((int)util::lengthPrinted(column) < totalLength){column.push_back('-');} column += "\n"; res += column; diff --git a/transition_machine/src/Oracle.cpp b/transition_machine/src/Oracle.cpp index 80be5ab..baf0789 100644 --- a/transition_machine/src/Oracle.cpp +++ b/transition_machine/src/Oracle.cpp @@ -123,7 +123,7 @@ void Oracle::createDatabase() while (fscanf(fd, "%[^\n]\n", b1) == 1) { - auto line = split(b1); + auto line = util::split(b1); if (line.size() == 2) oracle->data[line[0]] = line[1]; else @@ -148,7 +148,7 @@ void Oracle::createDatabase() while (fscanf(fd, "%[^\n]\n", b1) == 1) { - auto line = split(b1); + auto line = util::split(b1); if (line.size() == 2) oracle->data[line[0]] = line[1]; else @@ -173,7 +173,7 @@ void Oracle::createDatabase() while (fscanf(fd, "%[^\n]\n", b1) == 1) { - auto line = split(b1); + auto line = util::split(b1); if (line.size() == 2) oracle->data[line[0]] = line[1]; else @@ -221,7 +221,7 @@ void Oracle::createDatabase() auto & currentWordRef = c.getTape("FORM").getRef(0); auto & currentWordHyp = c.getTape("FORM").getHyp(0); - auto splited = split(split(action, ' ').back(),'@'); + auto splited = util::split(util::split(action, ' ').back(),'@'); if (splited.size() > 2) { @@ -245,7 +245,7 @@ void Oracle::createDatabase() if (action == "ADDCHARTOWORD" && currentWordRef.size() > currentWordHyp.size()) { - if (c.hasTape("ID") && split(c.getTape("ID").getRef(0), '-').size() > 1) + if (c.hasTape("ID") && util::split(c.getTape("ID").getRef(0), '-').size() > 1) return 1; for (unsigned int i = 0; i < (currentWordRef.size()-currentWordHyp.size()); i++) @@ -311,8 +311,8 @@ void Oracle::createDatabase() if (c.pastActions.size() == 0) return std::string("MOVE signature 0"); - std::string previousState = noAccentLower(c.pastActions.getElem(0).first); - std::string previousAction = noAccentLower(c.pastActions.getElem(0).second.name); + std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); + std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); std::string newState; int movement = 0; @@ -340,8 +340,8 @@ void Oracle::createDatabase() if (c.pastActions.size() == 0) return std::string("MOVE tokenizer 0"); - std::string previousState = noAccentLower(c.pastActions.getElem(0).first); - std::string previousAction = noAccentLower(c.pastActions.getElem(0).second.name); + std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); + std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); std::string newState; int movement = 0; @@ -349,12 +349,12 @@ void Oracle::createDatabase() newState = "tagger"; else if (previousState == "tokenizer") { - if (split(previousAction, ' ')[0] == "splitword" || split(previousAction, ' ')[0] == "endword") + if (util::split(previousAction, ' ')[0] == "splitword" || util::split(previousAction, ' ')[0] == "endword") newState = "signature"; else newState = "tokenizer"; - if (split(previousAction, ' ')[0] == "splitword") + if (util::split(previousAction, ' ')[0] == "splitword") movement = 1; } else if (previousState == "tagger" || previousState == "error_tagger") @@ -379,8 +379,8 @@ void Oracle::createDatabase() if (c.pastActions.size() == 0) return std::string("MOVE signature 0"); - std::string previousState = noAccentLower(c.pastActions.getElem(0).first); - std::string previousAction = noAccentLower(c.pastActions.getElem(0).second.name); + std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); + std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); std::string newState; int movement = 0; @@ -389,7 +389,7 @@ void Oracle::createDatabase() else if (previousState == "parser") { newState = "parser"; - if (split(previousAction, ' ')[0] == "shift" || split(previousAction, ' ')[0] == "right") + if (util::split(previousAction, ' ')[0] == "shift" || util::split(previousAction, ' ')[0] == "right") { movement = 1; newState = "signature"; @@ -398,14 +398,14 @@ void Oracle::createDatabase() if (movement > 0 && c.endOfTapes()) movement = 0; - if (split(previousAction, ' ')[0] == "eos" && c.endOfTapes()) + if (util::split(previousAction, ' ')[0] == "eos" && c.endOfTapes()) return std::string(""); } else if (previousState == "error_parser") { newState = "parser"; - std::string previousParserAction = noAccentLower(c.pastActions.getElem(1).second.name); - if (split(previousParserAction, ' ')[0] == "shift" || split(previousParserAction, ' ')[0] == "right") + std::string previousParserAction = util::noAccentLower(c.pastActions.getElem(1).second.name); + if (util::split(previousParserAction, ' ')[0] == "shift" || util::split(previousParserAction, ' ')[0] == "right") { newState = "signature"; movement = 1; @@ -428,8 +428,8 @@ void Oracle::createDatabase() if (c.pastActions.size() == 0) return std::string("MOVE signature 0"); - std::string previousState = noAccentLower(c.pastActions.getElem(0).first); - std::string previousAction = noAccentLower(c.pastActions.getElem(0).second.name); + std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first); + std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name); std::string newState; int movement = 0; @@ -450,7 +450,7 @@ void Oracle::createDatabase() newState = "parser"; else if (previousState == "parser") { - if (split(previousAction, ' ')[0] == "shift" || split(previousAction, ' ')[0] == "right") + if (util::split(previousAction, ' ')[0] == "shift" || util::split(previousAction, ' ')[0] == "right") { newState = "signature"; movement = 1; @@ -503,8 +503,8 @@ void Oracle::createDatabase() if (oracle->data.count(form)) signature = oracle->data[form]; - else if (oracle->data.count(noAccentLower(form))) - signature = oracle->data[noAccentLower(form)]; + else if (oracle->data.count(util::noAccentLower(form))) + signature = oracle->data[util::noAccentLower(form)]; else signature = "UNKNOWN"; @@ -552,8 +552,8 @@ void Oracle::createDatabase() if (oracle->data.count(form + "_" + pos)) lemma = oracle->data[form + "_" + pos]; - else if (oracle->data.count(noAccentLower(form)+"_"+pos)) - lemma = oracle->data[noAccentLower(form) + "_" + pos]; + else if (oracle->data.count(util::noAccentLower(form)+"_"+pos)) + lemma = oracle->data[util::noAccentLower(form) + "_" + pos]; else return std::string("NOTFOUND"); @@ -579,7 +579,7 @@ void Oracle::createDatabase() { const std::string & form = c.getTape("FORM").getRef(0); const std::string & lemma = c.getTape("LEMMA").getRef(0); - std::string rule = getRule(toLowerCase(form), toLowerCase(lemma)); + std::string rule = util::getRule(util::toLowerCase(form), util::toLowerCase(lemma)); return action == std::string("RULE LEMMA ON FORM ") + rule ? 0 : 1; }))); @@ -603,15 +603,15 @@ void Oracle::createDatabase() auto & eos = c.getTape(ProgramParameters::sequenceDelimiterTape); int head = c.getHead(); - bool headIsMultiword = split(ids.getRef(0), '-').size() > 1; - bool headIsEmptyNode = split(ids.getRef(0), '.').size() > 1; + bool headIsMultiword = util::split(ids.getRef(0), '-').size() > 1; + bool headIsEmptyNode = util::split(ids.getRef(0), '.').size() > 1; int headGov = -1; try {headGov = head + std::stoi(govs.getRef(0));} catch (std::exception &) {headGov = -1;} int stackHead = c.stackEmpty() ? 0 : c.stackTop(); - bool stackHeadIsMultiword = split(ids.getRef(stackHead-head), '-').size() > 1; - bool stackHeadIsEmptyNode = split(ids.getRef(stackHead-head), '.').size() > 1; + bool stackHeadIsMultiword = util::split(ids.getRef(stackHead-head), '-').size() > 1; + bool stackHeadIsEmptyNode = util::split(ids.getRef(stackHead-head), '.').size() > 1; int stackGov = -1; try {stackGov = stackHead + std::stoi(govs.getRef(stackHead-head));} catch (std::exception &) {stackGov = -1;} @@ -630,7 +630,7 @@ void Oracle::createDatabase() if (sentenceEnd == eos.refSize()) sentenceEnd--; - auto parts = split(action); + auto parts = util::split(action); if (parts[0] == "SHIFT") { @@ -656,7 +656,7 @@ void Oracle::createDatabase() } else if (parts[0] == "WRITE" && parts.size() == 4) { - auto object = split(parts[1], '.'); + auto object = util::split(parts[1], '.'); if (object[0] == "b") { if (parts[2] == "LABEL") @@ -721,7 +721,7 @@ void Oracle::createDatabase() if (parts.size() == 1) return cost; - if (split(labels.getRef(stackHead-head), ':')[0] == split(parts[1], ':')[0]) + if (util::split(labels.getRef(stackHead-head), ':')[0] == util::split(parts[1], ':')[0]) return cost; return cost+1; @@ -756,7 +756,7 @@ void Oracle::createDatabase() if (parts.size() == 1) return cost; - if (split(labels.getRef(0), ':')[0] == split(parts[1], ':')[0]) + if (util::split(labels.getRef(0), ':')[0] == util::split(parts[1], ':')[0]) return cost; return cost+1; @@ -820,7 +820,7 @@ void Oracle::createDatabase() void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & action) { - auto parts = split(action); + auto parts = util::split(action); if (parts[0] == "WRITE") { if (parts.size() != 4) @@ -828,7 +828,7 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & fprintf(stderr, "Wrong number of action arguments\n"); return; } - auto object = split(parts[1], '.'); + auto object = util::split(parts[1], '.'); auto tape = parts[2]; auto label = parts[3]; std::string expected; @@ -858,7 +858,7 @@ void Oracle::explainCostOfAction(FILE * output, Config & c, const std::string & } else if (parts[0] == "IGNORECHAR") { - if (!isUtf8Separator(c.rawInput.begin()+c.rawInputHeadIndex)) + if (!util::isUtf8Separator(c.rawInput.begin()+c.rawInputHeadIndex)) { fprintf(stderr, "rawInputHead is pointing to non separator character <%c>(%d)\n", c.rawInput[c.rawInputHeadIndex], c.rawInput[c.rawInputHeadIndex]); return; -- GitLab