Skip to content
Snippets Groups Projects
Select Git revision
  • 89b9246365ce983d292f845ccdd342318efd91ce
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

simple_decoder_parser_arc_eager.c

Blame
  • MacaonTrain.cpp 10.61 KiB
    #include "MacaonTrain.hpp"
    #include <filesystem>
    #include "util.hpp"
    #include "NeuralNetwork.hpp"
    #include "WordEmbeddings.hpp"
    
    namespace po = boost::program_options;
    
    po::options_description MacaonTrain::getOptionsDescription()
    {
      po::options_description desc("Command-Line Arguments ");
    
      po::options_description req("Required");
      req.add_options()
        ("model", po::value<std::string>()->required(),
          "Directory containing the machine file to train")
        ("trainTSV", po::value<std::string>()->required(),
          "TSV file of the training corpus, in CONLLU format");
    
      po::options_description opt("Optional");
      opt.add_options()
        ("debug,d", "Print debuging infos on stderr")
        ("silent", "Don't print speed and progress")
        ("devScore", "Compute score on dev instead of loss (slower)")
        ("mcd", po::value<std::string>()->default_value("ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL"),
          "Comma separated column names that describes the input/output format")
        ("trainTXT", po::value<std::string>()->default_value(""),
          "Raw text file of the training corpus")
        ("devTSV", po::value<std::string>()->default_value(""),
          "TSV file of the development corpus, in CONLLU format")
        ("devTXT", po::value<std::string>()->default_value(""),
          "Raw text file of the development corpus")
        ("nbEpochs,n", po::value<int>()->default_value(5),
          "Number of training epochs")
        ("batchSize", po::value<int>()->default_value(64),
          "Number of examples per batch")
        ("explorationThreshold", po::value<float>()->default_value(0.1),
          "Maximum probability difference with the best scoring transition, for a transition to be explored during dynamic extraction of dataset")
        ("machine", po::value<std::string>()->default_value(""),
          "Reading machine file content")
        ("trainStrategy", po::value<std::string>()->default_value("0,ExtractGold,ResetParameters"),
          "Description of what should happen during training")
        ("loss", po::value<std::string>()->default_value("CrossEntropy"),
          "Loss function to use during training : CrossEntropy | bce | mse | hinge")
        ("seed", po::value<int>()->default_value(100),
          "Number of examples per batch")
        ("scaleGrad", "Scale embedding's gradient with its frequence in the minibatch")
        ("maxNorm", po::value<float>()->default_value(std::numeric_limits<float>::max()),
          "Max norm for the embeddings")
        ("help,h", "Produce this help message");
    
      desc.add(req).add(opt);
    
      return desc;
    }
    
    po::variables_map MacaonTrain::checkOptions(po::options_description & od)
    {
      po::variables_map vm;
    
      try {po::store(po::parse_command_line(argc, argv, od), vm);}
      catch(std::exception & e) {util::myThrow(e.what());}
    
      if (vm.count("help"))
      {
        std::stringstream ss;
        ss << od;
        fmt::print(stderr, "{}\n", ss.str());
        exit(0);
      }