diff --git a/error_correction/src/macaon_train_error_detector.cpp b/error_correction/src/macaon_train_error_detector.cpp index 2270820758926adb961a5d1606d5e4e88e44cda8..553edbb23720bd848889008362a6180ea9601a20 100644 --- a/error_correction/src/macaon_train_error_detector.cpp +++ b/error_correction/src/macaon_train_error_detector.cpp @@ -178,8 +178,10 @@ macaon_decode --lang " + ProgramParameters::lang + " --tm machine.tm --bd test. if (system(("ln -f -s " + ProgramParameters::expPath + "decode.sh " + ProgramParameters::langPath + "bin/maca_tm_" + ProgramParameters::expName).c_str())){} } -std::map<std::string, std::pair<float, std::pair<float, float> > > getScoreOnDev(TransitionMachine & tm, std::vector<Config> devConfigs, std::vector<int> & devIsErrors, std::vector<int> &) +std::map<std::string, std::pair<float, std::pair<float, float> > > getScoreOnDev(TransitionMachine & tm, std::vector<int> & devIsErrors, std::vector<int> &, File & dev, Config & devConfig) { + dev.rewind(); + FILE * devPtr = dev.getDescriptor(); tm.reset(); std::map< std::string, std::pair<int, int> > counts; @@ -190,9 +192,18 @@ std::map<std::string, std::pair<float, std::pair<float, float> > > getScoreOnDev std::vector<int> predictions; std::string classifierName; - for (unsigned int i = 0; i < devConfigs.size(); i++) + int isError, errorIndex; + + for (unsigned int i = 0; i < devIsErrors.size(); i++) { - auto & devConfig = devConfigs[i]; + if (fscanf(devPtr, "%d\t%d\n", &isError, &errorIndex) != 2) + { + fprintf(stderr, "ERROR (%s) : corpus bad format. Aborting.\n", ERRINFO); + exit(1); + } + + devConfig.loadFromFile(dev); + TransitionMachine::State * currentState = tm.getCurrentState(); Classifier * classifier = currentState->classifier; devConfig.setCurrentStateName(¤tState->name); @@ -271,7 +282,7 @@ std::map<std::string, std::pair<float, std::pair<float, float> > > getScoreOnDev return scores; } -void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int> > & trainCounter, std::map< std::string, float > & scores, TransitionMachine & tm, int curIter, std::map< std::string, float > & bestScores, std::vector<Config> & devConfigs, std::vector<int> & devIsErrors, std::vector<int> & devErrorIndexes) +void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int> > & trainCounter, std::map< std::string, float > & scores, TransitionMachine & tm, int curIter, std::map< std::string, float > & bestScores, std::vector<int> & devIsErrors, std::vector<int> & devErrorIndexes, File & devFile, Config & config) { for (auto & it : trainCounter) scores[it.first] = 100.0 * it.second.second / it.second.first; @@ -284,7 +295,7 @@ void printScoresAndSave(FILE * output, std::map< std::string, std::pair<int, int std::map<std::string, bool> saved; - auto devScores = getScoreOnDev(tm, devConfigs, devIsErrors, devErrorIndexes); + auto devScores = getScoreOnDev(tm, devIsErrors, devErrorIndexes, devFile, config); for (auto & it : devScores) { @@ -354,56 +365,55 @@ void launchTraining() std::map< std::string, bool > topologyPrinted; std::map< std::string, std::pair<int, int> > trainCounter; int curIter = 0; - std::vector<Config> configs; std::vector<int> isErrors; std::vector<int> errorIndexes; - std::vector<Config> devConfigs; std::vector<int> devIsErrors; std::vector<int> devErrorIndexes; int isError; int errorIndex; + Config config(trainBD); fprintf(stderr, "Reading train corpus..."); while (fscanf(trainPtr, "%d\t%d\n", &isError, &errorIndex) == 2) { - configs.emplace_back(trainBD); isErrors.emplace_back(isError); errorIndexes.emplace_back(errorIndex); - configs.back().loadFromFile(train); + config.loadFromFile(train); } fprintf(stderr, " done !\n"); fprintf(stderr, "Reading dev corpus..."); while (fscanf(devPtr, "%d\t%d\n", &isError, &errorIndex) == 2) { - devConfigs.emplace_back(trainBD); devIsErrors.emplace_back(isError); devErrorIndexes.emplace_back(errorIndex); - devConfigs.back().loadFromFile(dev); + config.loadFromFile(dev); } fprintf(stderr, " done !\n"); - auto resetAndShuffle = [&configs,&trainCounter]() + auto resetAndShuffle = [&trainCounter,&train,&dev,&trainPtr]() { - //TODO shuffle - /* - if(ProgramParameters::shuffleExamples) - std::random_shuffle(configs.begin(), configs.end()); - */ - + train.rewind(); + dev.rewind(); + trainPtr = train.getDescriptor(); for (auto & it : trainCounter) it.second.first = it.second.second = 0; }; + Config trainConfig(trainBD); while (curIter < ProgramParameters::nbIter) { resetAndShuffle(); - for (unsigned int i = 0; i < configs.size(); i++) + for (unsigned int i = 0; i < isErrors.size(); i++) { - auto & trainConfig = configs[i]; - isError = isErrors[i]; - errorIndex = errorIndexes[i]; + if (fscanf(trainPtr, "%d\t%d\n", &isError, &errorIndex) != 2) + { + fprintf(stderr, "ERROR (%s) : corpus bad format. Aborting.\n", ERRINFO); + exit(1); + } + + trainConfig.loadFromFile(train); TransitionMachine::State * currentState = tm.getCurrentState(); Classifier * classifier = currentState->classifier; @@ -420,7 +430,7 @@ void launchTraining() // Print current iter advancement in percentage if (ProgramParameters::interactive) { - int totalSize = configs.size(); + int totalSize = isErrors.size(); int steps = i; if (steps % 200 == 0 || totalSize-steps < 200) fprintf(stderr, "Current Iteration : %.2f%%\r", 100.0*steps/totalSize); @@ -445,7 +455,7 @@ void launchTraining() trainCounter[classifier->name].second += pAction == oAction ? 1 : 0; } - printScoresAndSave(stderr, trainCounter, scores, tm, curIter, bestScores, devConfigs, devIsErrors, devErrorIndexes); + printScoresAndSave(stderr, trainCounter, scores, tm, curIter, bestScores, devIsErrors, devErrorIndexes, dev, config); curIter++; } }