diff --git a/CMakeLists.txt b/CMakeLists.txt index 01d908e27b1424e584413a8426cb5a3492955939..9f9c4ab5e1b1b6b86f608cc95277a2fc2bac2ad4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,12 +28,14 @@ include_directories(transition_machine/include) include_directories(trainer/include) include_directories(decoder/include) include_directories(MLP/include) +include_directories(error_correction/include) add_subdirectory(maca_common) add_subdirectory(transition_machine) add_subdirectory(trainer) add_subdirectory(decoder) add_subdirectory(MLP) +add_subdirectory(error_correction) set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/config) diff --git a/error_correction/CMakeLists.txt b/error_correction/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe0b241d275cf50a628a2a59f89fb1eb8486062e --- /dev/null +++ b/error_correction/CMakeLists.txt @@ -0,0 +1,7 @@ +FILE(GLOB SOURCES src/*.cpp) + +add_executable(macaon_error_correction src/macaon_error_correction.cpp) +target_link_libraries(macaon_error_correction transition_machine) +target_link_libraries(macaon_error_correction ${Boost_PROGRAM_OPTIONS_LIBRARY}) +install(TARGETS macaon_error_correction DESTINATION bin) + diff --git a/error_correction/src/macaon_error_correction.cpp b/error_correction/src/macaon_error_correction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..341f3ca7add3032b689a15f6a55e7f1a13e0b965 --- /dev/null +++ b/error_correction/src/macaon_error_correction.cpp @@ -0,0 +1,249 @@ +/// @file macaon_error_correction.cpp +/// @author Franck Dary +/// @version 1.0 +/// @date 2018-11-27 + +#include <cstdio> +#include <cstdlib> +#include <boost/program_options.hpp> +#include "BD.hpp" +#include "Config.hpp" +#include "TransitionMachine.hpp" +#include "util.hpp" + +namespace po = boost::program_options; + +/// @brief Get the list of mandatory and optional program arguments. +/// +/// @return The lists. +po::options_description getOptionsDescription() +{ + po::options_description desc("Command-Line Arguments "); + + po::options_description req("Required"); + req.add_options() + ("expName", po::value<std::string>()->required(), + "Name of this experiment") + ("classifier", po::value<std::string>()->required(), + "Name of the monitored classifier") + ("tm", po::value<std::string>()->required(), + "File describing the Tape Machine to use") + ("bd", po::value<std::string>()->required(), + "BD file that describes the multi-tapes buffer") + ("mcd", po::value<std::string>()->required(), + "MCD file that describes the input") + ("input,I", po::value<std::string>()->required(), + "Input file formated according to the mcd"); + + po::options_description opt("Optional"); + opt.add_options() + ("help,h", "Produce this help message") + ("debug,d", "Print infos on stderr") + ("printEntropy", "Print entropy for each sequence") + ("sequenceDelimiterTape", po::value<std::string>()->default_value("EOS"), + "The name of the buffer's tape that contains the delimiter token for a sequence") + ("sequenceDelimiter", po::value<std::string>()->default_value("1"), + "The value of the token that act as a delimiter for sequences") + + ("lang", po::value<std::string>()->default_value("fr"), + "Language you are working with"); + + + desc.add(req).add(opt); + + return desc; +} + +/// @brief Store the program arguments inside a variables_map +/// +/// @param od The description of all the possible options. +/// @param argc The number of arguments given to this program. +/// @param argv The values of arguments given to this program. +/// +/// @return The variables map +po::variables_map checkOptions(po::options_description & od, int argc, char ** argv) +{ + po::variables_map vm; + + try {po::store(po::parse_command_line(argc, argv, od), vm);} + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << "\n"; + od.print(std::cerr); + exit(1); + } + + if (vm.count("help")) + { + std::cout << od << "\n"; + exit(0); + } + + try {po::notify(vm);} + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << "\n"; + od.print(std::cerr); + exit(1); + } + + return vm; +} + +/// @brief Uses a pre-trained TransitionMachine to output a pair of Config - labels. That can be used as a corpus for error detection. +/// +/// @param argc The number of arguments given to this program. +/// @param argv[] Array of arguments given to this program. +/// +/// @return 0 if there was no crash. +int main(int argc, char * argv[]) +{ + auto od = getOptionsDescription(); + + po::variables_map vm = checkOptions(od, argc, argv); + + ProgramParameters::expName = vm["expName"].as<std::string>(); + ProgramParameters::tmName = vm["tm"].as<std::string>(); + ProgramParameters::bdName = vm["bd"].as<std::string>(); + ProgramParameters::input = vm["input"].as<std::string>(); + ProgramParameters::mcdName = vm["mcd"].as<std::string>(); + ProgramParameters::debug = vm.count("debug") == 0 ? false : true; + ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true; + ProgramParameters::lang = vm["lang"].as<std::string>(); + ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>(); + ProgramParameters::sequenceDelimiter = vm["sequenceDelimiter"].as<std::string>(); + ProgramParameters::classifierName = vm["classifier"].as<std::string>(); + + const char * MACAON_DIR = std::getenv("MACAON_DIR"); + std::string slash = "/"; + ProgramParameters::expPath = MACAON_DIR + slash + ProgramParameters::lang + slash + "bin/" + ProgramParameters::expName + slash; + + ProgramParameters::tmFilename = ProgramParameters::expPath + ProgramParameters::tmName; + ProgramParameters::bdFilename = ProgramParameters::expPath + ProgramParameters::bdName; + ProgramParameters::mcdFilename = ProgramParameters::mcdName; + + TransitionMachine tm(false); + + BD bd(ProgramParameters::bdFilename, ProgramParameters::mcdFilename); + Config config(bd); + config.readInput(ProgramParameters::input); + + float entropyAccumulator = 0.0; + int nbActionsInSequence = 0; + bool justFlipped = false; + bool configIsError = false; + int actionIndex = 0; + int errorIndex = 0; + while (!config.isFinal()) + { + TransitionMachine::State * currentState = tm.getCurrentState(); + Classifier * classifier = currentState->classifier; + config.setCurrentStateName(¤tState->name); + Dict::currentClassifierName = classifier->name; + + if (ProgramParameters::debug) + { + config.printForDebug(stderr); + fprintf(stderr, "State : \'%s\'\n", currentState->name.c_str()); + } + + auto weightedActions = classifier->weightActions(config); + + + if (ProgramParameters::debug) + { + Classifier::printWeightedActions(stderr, weightedActions); + fprintf(stderr, "\n"); + } + + std::string & predictedAction = weightedActions[0].second.second; + Action * action = classifier->getAction(predictedAction); + + for(unsigned int i = 0; i < weightedActions.size(); i++) + { + predictedAction = weightedActions[i].second.second; + action = classifier->getAction(predictedAction); + + if(weightedActions[i].first) + break; + } + + if(!action->appliable(config)) + { + // First case the analysis is finished but without an empty stack + if (config.head == (int)config.tapes[0].ref.size()-1) + { + while (!config.stackEmpty()) + config.stackPop(); + continue; + } + else + { + fprintf(stderr, "ERROR (%s) : action \'%s\' is not appliable. Aborting\n", ERRINFO, predictedAction.c_str()); + exit(1); + } + } + + if (classifier->name == ProgramParameters::classifierName) + { + fprintf(stderr, "%d\t%d\n", configIsError ? 1 : 0, errorIndex - actionIndex); + config.printAsExample(stderr); + actionIndex++; + + auto zeroCostActions = classifier->getZeroCostActions(config); + bool pActionIsZeroCost = false; + for (auto & s : zeroCostActions) + if (s == action->name) + { + pActionIsZeroCost = true; + break; + } + + int windowSize = 5; + + if (!pActionIsZeroCost) + { + if (!configIsError || (actionIndex - errorIndex > windowSize)) + { + configIsError = true; + errorIndex = actionIndex-1; + } + } + else if (configIsError && (actionIndex - errorIndex > windowSize)) + { + configIsError = false; + errorIndex = 0; + } + } + + action->apply(config); + + TransitionMachine::Transition * transition = tm.getTransition(predictedAction); + tm.takeTransition(transition); + + config.moveHead(transition->headMvt); + + if (ProgramParameters::printEntropy) + { + nbActionsInSequence++; + + entropyAccumulator += Classifier::computeEntropy(weightedActions); + + if (config.head >= 1 && config.getTape(ProgramParameters::sequenceDelimiterTape)[config.head-1] != ProgramParameters::sequenceDelimiter) + justFlipped = false; + + if ((config.head >= 1 && config.getTape(ProgramParameters::sequenceDelimiterTape)[config.head-1] == ProgramParameters::sequenceDelimiter && !justFlipped)) + { + justFlipped = true; + entropyAccumulator /= nbActionsInSequence; + nbActionsInSequence = 0; + fprintf(stderr, "Entropy : %.2f\n", entropyAccumulator); + entropyAccumulator = 0.0; + } + } + + } + + return 0; +} + diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 2cd815ae7360bebaff2ec6a5938fac2ab22179de..35dadb28698df9bdaee43ff8a19399e874dec35c 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -49,6 +49,7 @@ struct ProgramParameters static bool printTime; static std::string sequenceDelimiterTape; static std::string sequenceDelimiter; + static std::string classifierName; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index 43604b5bbc7763887f83a4143872a9c9ff9845d5..a9c1ec42f20a8bc7b93ba3c7579bf5443c60eabf 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -44,3 +44,4 @@ int ProgramParameters::iterationSize; int ProgramParameters::nbTrain; std::string ProgramParameters::sequenceDelimiterTape; std::string ProgramParameters::sequenceDelimiter; +std::string ProgramParameters::classifierName;