Select Git revision
simple_decoder_parser_arc_eager.c
MacaonTrain.cpp 10.61 KiB
#include "MacaonTrain.hpp"
#include <filesystem>
#include "util.hpp"
#include "NeuralNetwork.hpp"
#include "WordEmbeddings.hpp"
namespace po = boost::program_options;
po::options_description MacaonTrain::getOptionsDescription()
{
po::options_description desc("Command-Line Arguments ");
po::options_description req("Required");
req.add_options()
("model", po::value<std::string>()->required(),
"Directory containing the machine file to train")
("trainTSV", po::value<std::string>()->required(),
"TSV file of the training corpus, in CONLLU format");
po::options_description opt("Optional");
opt.add_options()
("debug,d", "Print debuging infos on stderr")
("silent", "Don't print speed and progress")
("devScore", "Compute score on dev instead of loss (slower)")
("mcd", po::value<std::string>()->default_value("ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL"),
"Comma separated column names that describes the input/output format")
("trainTXT", po::value<std::string>()->default_value(""),
"Raw text file of the training corpus")
("devTSV", po::value<std::string>()->default_value(""),
"TSV file of the development corpus, in CONLLU format")
("devTXT", po::value<std::string>()->default_value(""),
"Raw text file of the development corpus")
("nbEpochs,n", po::value<int>()->default_value(5),
"Number of training epochs")
("batchSize", po::value<int>()->default_value(64),
"Number of examples per batch")
("explorationThreshold", po::value<float>()->default_value(0.1),
"Maximum probability difference with the best scoring transition, for a transition to be explored during dynamic extraction of dataset")
("machine", po::value<std::string>()->default_value(""),
"Reading machine file content")
("trainStrategy", po::value<std::string>()->default_value("0,ExtractGold,ResetParameters"),
"Description of what should happen during training")
("loss", po::value<std::string>()->default_value("CrossEntropy"),
"Loss function to use during training : CrossEntropy | bce | mse | hinge")
("seed", po::value<int>()->default_value(100),
"Number of examples per batch")
("scaleGrad", "Scale embedding's gradient with its frequence in the minibatch")
("maxNorm", po::value<float>()->default_value(std::numeric_limits<float>::max()),
"Max norm for the embeddings")
("help,h", "Produce this help message");
desc.add(req).add(opt);
return desc;
}
po::variables_map MacaonTrain::checkOptions(po::options_description & od)
{
po::variables_map vm;
try {po::store(po::parse_command_line(argc, argv, od), vm);}
catch(std::exception & e) {util::myThrow(e.what());}
if (vm.count("help"))
{
std::stringstream ss;
ss << od;
fmt::print(stderr, "{}\n", ss.str());
exit(0);
}