Skip to content
Snippets Groups Projects
Select Git revision
  • 3caec36ac4c33bdc8a46a202b0d3d467a92e3742
  • master default protected
  • fullUD
  • movementInAction
4 results

macaon_decode.cpp

  • macaon_decode.cpp 6.55 KiB
    /// @file macaon_decode.cpp
    /// @author Franck Dary
    /// @version 1.0
    /// @date 2018-08-07
    
    #include <cstdio>
    #include <cstdlib>
    #include <boost/program_options.hpp>
    #include "BD.hpp"
    #include "Config.hpp"
    #include "TransitionMachine.hpp"
    #include "Decoder.hpp"
    
    namespace po = boost::program_options;
    
    /// @brief Get the list of mandatory and optional program arguments.
    ///
    /// @return The lists.
    po::options_description getOptionsDescription()
    {
      po::options_description desc("Command-Line Arguments ");
    
      po::options_description req("Required");
      req.add_options()
        ("expName", po::value<std::string>()->required(),
          "Name of this experiment")
        ("tm", po::value<std::string>()->required(),
          "File describing the Tape Machine to use")
        ("bd", po::value<std::string>()->required(),
          "BD file that describes the multi-tapes buffer")
        ("mcd", po::value<std::string>()->required(),
          "MCD file that describes the input")
        ("input,I", po::value<std::string>()->required(),
          "Input file formated according to the mcd");
    
      po::options_description opt("Optional");
      opt.add_options()
        ("help,h", "Produce this help message")
        ("debug,d", "Print infos on stderr")
        ("dicts", po::value<std::string>()->default_value(""),
          "The .dict file describing all the dictionaries to be used in the experiement. By default the filename specified in the .tm file will be used")
        ("featureModels", po::value<std::string>()->default_value(""),
          "For each classifier, specify what .fm (feature model) file to use. By default the filename specified in the .cla file will be used. Example : --featureModel Parser=parser.fm,Tagger=tagger.fm")
    
        ("printEntropy", "Print entropy for each sequence")
        ("sequenceDelimiterTape", po::value<std::string>()->default_value("EOS"),
          "The name of the buffer's tape that contains the delimiter token for a sequence")
        ("sequenceDelimiter", po::value<std::string>()->default_value("1"),
          "The value of the token that act as a delimiter for sequences")
        ("showFeatureRepresentation", po::value<int>()->default_value(0),
          "For each state of the Config, show its feature representation")
        ("interactive", po::value<bool>()->default_value(true),
          "Is the shell interactive ? Display advancement informations")
        ("lang", po::value<std::string>()->default_value("fr"),
          "Language you are working with");
    
      po::options_description analysis("Error analysis related options");
      analysis.add_options()
        ("errorAnalysis", "Print an analysis of errors")
        ("meanEntropy", "Print the mean entropy for error types")
        ("onlyPrefixes", "Only uses the prefixes of error categories")
        ("nbErrorsToShow", po::value<int>()->default_value(10),
          "Display only the X most common errors")
        ("classifier", po::value<std::string>()->default_value(""),
          "Name of the monitored classifier, if not specified monitor everyone");
    
      desc.add(req).add(opt).add(analysis);
    
      return desc;
    }
    
    /// @brief Store the program arguments inside a variables_map
    ///
    /// @param od The description of all the possible options.
    /// @param argc The number of arguments given to this program.
    /// @param argv The values of arguments given to this program.
    ///
    /// @return The variables map
    po::variables_map checkOptions(po::options_description & od, int argc, char ** argv)
    {
      po::variables_map vm;
    
      try {po::store(po::parse_command_line(argc, argv, od), vm);}
      catch(std::exception& e)
      {
        std::cerr << "Error: " << e.what() << "\n";
        od.print(std::cerr);
        exit(1);
      }
    
      if (vm.count("help"))
      {
        std::cout << od << "\n";
        exit(0);
      }
    
      try {po::notify(vm);}
      catch(std::exception& e)
      {
        std::cerr << "Error: " << e.what() << "\n";
        od.print(std::cerr);
        exit(1);
      }
    
      return vm;
    }
    
    /// @brief Uses a pre-trained TransitionMachine to predict and add information to a structured input file.
    ///
    /// @param argc The number of arguments given to this program.
    /// @param argv[] Array of arguments given to this program.
    ///
    /// @return 0 if there was no crash.
    int main(int argc, char * argv[])
    {
      auto od = getOptionsDescription();
    
      po::variables_map vm = checkOptions(od, argc, argv);
    
      ProgramParameters::expName = vm["expName"].as<std::string>();
      ProgramParameters::tmName = vm["tm"].as<std::string>();
      ProgramParameters::bdName = vm["bd"].as<std::string>();
      ProgramParameters::input = vm["input"].as<std::string>();
      ProgramParameters::mcdName = vm["mcd"].as<std::string>();
      ProgramParameters::debug = vm.count("debug") == 0 ? false : true;
      ProgramParameters::interactive = vm["interactive"].as<bool>();
      ProgramParameters::errorAnalysis = vm.count("errorAnalysis") == 0 ? false : true;
      ProgramParameters::nbErrorsToShow = vm["nbErrorsToShow"].as<int>();
      ProgramParameters::meanEntropy = vm.count("meanEntropy") == 0 ? false : true;
      ProgramParameters::onlyPrefixes = vm.count("onlyPrefixes") == 0 ? false : true;
      ProgramParameters::dicts = vm["dicts"].as<std::string>();
      ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true;
      ProgramParameters::lang = vm["lang"].as<std::string>();
      ProgramParameters::sequenceDelimiterTape = vm["sequenceDelimiterTape"].as<std::string>();
      ProgramParameters::sequenceDelimiter = vm["sequenceDelimiter"].as<std::string>();
      ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>();
      ProgramParameters::optimizer = "none";
      std::string featureModels = vm["featureModels"].as<std::string>();
      if (!featureModels.empty())
      {
        auto byClassifiers = split(featureModels, ',');
        for (auto & classifier : byClassifiers)
        {
          auto parts = split(classifier, '=');
          if (parts.size() != 2)
          {
            fprintf(stderr, "ERROR (%s) : wrong format for argument of option featureModels. Aborting.\n", ERRINFO);
            exit(1);
          }
          ProgramParameters::featureModelByClassifier[parts[0]] = parts[1];
        }
      }
    
      const char * MACAON_DIR = std::getenv("MACAON_DIR");
      std::string slash = "/";
      ProgramParameters::expPath = MACAON_DIR + slash + ProgramParameters::lang + slash + "bin/" + ProgramParameters::expName + slash;
    
      ProgramParameters::tmFilename = ProgramParameters::expPath + ProgramParameters::tmName;
      ProgramParameters::bdFilename = ProgramParameters::expPath + ProgramParameters::bdName;
      ProgramParameters::mcdFilename = ProgramParameters::mcdName;
    
      TransitionMachine tapeMachine(false);
    
      BD bd(ProgramParameters::bdFilename, ProgramParameters::mcdFilename);
      Config config(bd);
      config.readInput(ProgramParameters::input);
    
      Decoder decoder(tapeMachine, config);
    
      decoder.decode();
    
      return 0;
    }