diff --git a/maca_crf_tagger/src/crf_barebones_decoder.cc b/maca_crf_tagger/src/crf_barebones_decoder.cc index 8a3aeb47e18e5260dc2470cff9730d118ce43920..fd2ab3af6fb2005b1e5bad447d032a8ea4b8f55e 100644 --- a/maca_crf_tagger/src/crf_barebones_decoder.cc +++ b/maca_crf_tagger/src/crf_barebones_decoder.cc @@ -1,3 +1,4 @@ +#include <cstdlib> #include <vector> #include "crf_decoder.hh" #include "crf_binlexicon.hh" @@ -42,15 +43,20 @@ void tag_sentence(macaon::Decoder& decoder, macaon::BinaryLexicon* lexicon, cons } void usage(const char* argv0) { - std::cerr << "usage: " << argv0 << " [--conll07|--column <num>] <model> [lexicon]\n"; + std::cerr << "usage: " << argv0 << " [--conll07|--column <num>] [<model> [lexicon]|-L <lang> [-D <model-dir>] [--nolexicon]]\n"; exit(1); } int main(int argc, char** argv) { bool isConll07 = false; // warning: no verification of conll07 format int word_offset = 0; + std::string modelDir = ""; + std::string modelLang = ""; + char* macaon_dir = getenv("MACAON_DIR"); + if(macaon_dir != NULL) modelDir = macaon_dir; std::string modelName = ""; std::string lexiconName = ""; + bool noLexicon = false; for(int i = 1; i < argc; i++) { std::string arg = argv[i]; @@ -63,6 +69,16 @@ int main(int argc, char** argv) { arg = argv[i + 1]; word_offset = strtol(arg.c_str(), NULL, 10) - 1; i++; + } else if(arg == "-L") { + arg = argv[i + 1]; + modelLang = arg; + i++; + } else if(arg == "-D") { + arg = argv[i + 1]; + modelDir = arg; + i++; + } else if(arg == "--nolexicon") { + noLexicon = true; } else if(modelName == "") { modelName = arg; } else if(lexiconName =="") { @@ -71,11 +87,19 @@ int main(int argc, char** argv) { usage(argv[0]); } } + if(modelDir != "" && modelLang != "") { + modelName = modelDir + "/" + modelLang + "/bin/crf_tagger_model.bin"; + if(!noLexicon) lexiconName = modelDir + "/" + modelLang + "/bin/crf_tagger_wordtag_lexicon.bin"; + } if(modelName == "" || word_offset < 0) usage(argv[0]); macaon::Decoder decoder(modelName); + if(!decoder.IsLoaded()) return 1; macaon::BinaryLexicon *lexicon = NULL; - if(lexiconName != "") lexicon = new macaon::BinaryLexicon(lexiconName, decoder.getTagset()); + if(lexiconName != "") { + lexicon = new macaon::BinaryLexicon(lexiconName, decoder.getTagset()); + if(!lexicon->IsLoaded()) return 1; + } std::string line; std::vector<std::vector<std::string> > lines;