From d0530fcd2e4b3663ba5dd6b17266a529f7791781 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@etu.univ-amu.fr> Date: Fri, 14 Dec 2018 15:09:17 +0100 Subject: [PATCH] Added a way to specify .fm file for each classifier, as command line arguments --- maca_common/include/ProgramParameters.hpp | 2 ++ maca_common/src/ProgramParameters.cpp | 1 + trainer/src/macaon_train.cpp | 17 +++++++++++++++++ transition_machine/src/Classifier.cpp | 7 ++++++- 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/maca_common/include/ProgramParameters.hpp b/maca_common/include/ProgramParameters.hpp index 14d920f..a749f9d 100644 --- a/maca_common/include/ProgramParameters.hpp +++ b/maca_common/include/ProgramParameters.hpp @@ -6,6 +6,7 @@ #define PROGRAMPARAMETERS__H #include <string> +#include <map> struct ProgramParameters { @@ -54,6 +55,7 @@ struct ProgramParameters static int batchSize; static std::string loss; static std::string dicts; + static std::map<std::string,std::string> featureModelByClassifier; private : diff --git a/maca_common/src/ProgramParameters.cpp b/maca_common/src/ProgramParameters.cpp index cd88256..41930ad 100644 --- a/maca_common/src/ProgramParameters.cpp +++ b/maca_common/src/ProgramParameters.cpp @@ -49,3 +49,4 @@ std::string ProgramParameters::sequenceDelimiter; std::string ProgramParameters::classifierName; int ProgramParameters::batchSize; std::string ProgramParameters::loss; +std::map<std::string,std::string> ProgramParameters::featureModelByClassifier; diff --git a/trainer/src/macaon_train.cpp b/trainer/src/macaon_train.cpp index ab8111c..1e3c79d 100644 --- a/trainer/src/macaon_train.cpp +++ b/trainer/src/macaon_train.cpp @@ -43,6 +43,8 @@ po::options_description getOptionsDescription() ("printEntropy", "Print mean entropy and standard deviation accross sequences") ("dicts", po::value<std::string>()->default_value(""), "The .dict file describing all the dictionaries to be used in the experiement. By default the filename specified in the .tm file will be used") + ("featureModels", po::value<std::string>()->default_value(""), + "For each classifier, specify what .fm (feature model) file to use. By default the filename specified in the .cla file will be used. Example : --featureModel Parser=parser.fm,Tagger=tagger.fm") ("optimizer", po::value<std::string>()->default_value("amsgrad"), "The learning algorithm to use : amsgrad | adam | sgd") ("loss", po::value<std::string>()->default_value("neglogsoftmax"), @@ -273,6 +275,21 @@ int main(int argc, char * argv[]) ProgramParameters::dynamicProbability = vm["proba"].as<float>(); ProgramParameters::showFeatureRepresentation = vm["showFeatureRepresentation"].as<int>(); ProgramParameters::iterationSize = vm["iterationSize"].as<int>(); + std::string featureModels = vm["featureModels"].as<std::string>(); + if (!featureModels.empty()) + { + auto byClassifiers = split(featureModels, ','); + for (auto & classifier : byClassifiers) + { + auto parts = split(classifier, '='); + if (parts.size() != 2) + { + fprintf(stderr, "ERROR (%s) : wrong format for argument of option featureModels. Aborting.\n", ERRINFO); + exit(1); + } + ProgramParameters::featureModelByClassifier[parts[0]] = parts[1]; + } + } if (ProgramParameters::nbTrain) { diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index 88b1cd6..59d6d4a 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -55,7 +55,12 @@ Classifier::Classifier(const std::string & filename, bool trainMode) if(fscanf(fd, "Feature Model : %s\n", buffer) != 1) badFormatAndAbort(ERRINFO); - fm.reset(new FeatureModel(ProgramParameters::expPath + buffer)); + std::string fmFilename = ProgramParameters::expPath + buffer; + + if (ProgramParameters::featureModelByClassifier.count(this->name)) + fmFilename = ProgramParameters::featureModelByClassifier[this->name]; + + fm.reset(new FeatureModel(fmFilename)); if(fscanf(fd, "Action Set : %s\n", buffer) != 1) badFormatAndAbort(ERRINFO); -- GitLab