From 2154d4e79f170eefcab31b0f98c2b2c725b66c4b Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Fri, 8 Jul 2016 15:57:52 -0400 Subject: [PATCH] added a transition tagger for fr --- d6/Makefile | 2 +- fr/Makefile | 5 ++- fr/maca_trans_tagger/Makefile | 52 ++++++++++++++++++++++ fr/maca_trans_tagger/maca_trans_tagger.fm | 11 +++++ fr/maca_trans_tagger/maca_trans_tagger.mcd | 2 + 5 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 fr/maca_trans_tagger/Makefile create mode 100644 fr/maca_trans_tagger/maca_trans_tagger.fm create mode 100644 fr/maca_trans_tagger/maca_trans_tagger.mcd diff --git a/d6/Makefile b/d6/Makefile index 0429336..3636053 100644 --- a/d6/Makefile +++ b/d6/Makefile @@ -11,7 +11,7 @@ install: $(MAKE) -C maca_trans_parser install # $(MAKE) -C maca_lemmatizer install # $(MAKE) -C maca_crf_tagger install - @tar -cvzf ./maca_datas.tgz bin +# @tar -cvzf ./maca_datas.tgz bin clean: # $(MAKE) -C data/morpho-lexicon clean diff --git a/fr/Makefile b/fr/Makefile index 0a170aa..a3cb14a 100644 --- a/fr/Makefile +++ b/fr/Makefile @@ -4,17 +4,20 @@ compile: $(MAKE) -C data/morpho-lexicon compile $(MAKE) -C data/treebank compile $(MAKE) -C maca_trans_parser compile + $(MAKE) -C maca_trans_tagger compile $(MAKE) -C maca_crf_tagger compile install: -mkdir -p bin $(MAKE) -C maca_trans_parser install + $(MAKE) -C maca_trans_tagger install $(MAKE) -C maca_lemmatizer install $(MAKE) -C maca_crf_tagger install - @tar -cvzf ./maca_datas.tgz bin +# @tar -cvzf ./maca_datas.tgz bin clean: $(MAKE) -C data/morpho-lexicon clean $(MAKE) -C data/treebank clean $(MAKE) -C maca_trans_parser clean + $(MAKE) -C maca_trans_tagger clean $(MAKE) -C maca_crf_tagger clean diff --git a/fr/maca_trans_tagger/Makefile b/fr/maca_trans_tagger/Makefile new file mode 100644 index 0000000..b5286c9 --- /dev/null +++ b/fr/maca_trans_tagger/Makefile @@ -0,0 +1,52 @@ +CONLL07TRAIN=../data/treebank/train.conll07 +CONLL07DEV=../data/treebank/dev.conll07 +CONLL07TEST=../data/treebank/test.conll07 + +CFF_TRAIN=train.cff +CFF_CUTOFF_TRAIN=train.cutoff.cff +PERCEPTRON_ITERATIONS=5 +CFF_CUTOFF=1 +FEATURES_MODEL_FILENAME=maca_trans_tagger.fm +VOCABS_FILENAME=maca_trans_tagger.vocab +MCD_FILENAME=maca_trans_tagger.mcd +MODEL_FILENAME=maca_trans_tagger.model +NUMBER_OF_SENTENCES=10000000 +STREAM_MODE=0 + + +##----------------------------------------------------------------------- +## compile +##----------------------------------------------------------------------- + +compile: $(MODEL_FILENAME) + +$(CFF_TRAIN): $(CONLL07TRAIN) + maca_trans_parser_conll2cff_tagger --conll $< --mode TRAIN --feat_model $(FEATURES_MODEL_FILENAME) --vocabs $(VOCABS_FILENAME) --cff $@ -s $(NUMBER_OF_SENTENCES) -T $(STREAM_MODE) -C $(MCD_FILENAME) + +$(CFF_CUTOFF_TRAIN): $(CFF_TRAIN) + maca_trans_parser_cff_cutoff --cff $< --vocabs $(VOCABS_FILENAME) --cutoff $(CFF_CUTOFF) > $@ + +#$(MODEL_FILENAME): $(CFF_CUTOFF_TRAIN) +$(MODEL_FILENAME): $(CFF_TRAIN) + maca_trans_parser_train_from_cff --cff $< --model $(MODEL_FILENAME) -n $(PERCEPTRON_ITERATIONS) + +##----------------------------------------------------------------------- +## install +##----------------------------------------------------------------------- + +install: + - cp $(FEATURES_MODEL_FILENAME) ../bin + - cp $(VOCABS_FILENAME) ../bin + - cp $(MODEL_FILENAME) ../bin + +##----------------------------------------------------------------------- +## clean +##----------------------------------------------------------------------- + +clean: + - rm -f $(VOCABS_FILENAME) + - rm -f $(MODEL_FILENAME) + - rm -f $(CFF_TRAIN) + - rm -f $(CFF_CUTOFF_TRAIN) + + diff --git a/fr/maca_trans_tagger/maca_trans_tagger.fm b/fr/maca_trans_tagger/maca_trans_tagger.fm new file mode 100644 index 0000000..a737807 --- /dev/null +++ b/fr/maca_trans_tagger/maca_trans_tagger.fm @@ -0,0 +1,11 @@ +b0f +b1f +b2f +s0f +s1f +s0p +s1p +s2p +s0p s1p +s0p s1p s2p +s1p s2p diff --git a/fr/maca_trans_tagger/maca_trans_tagger.mcd b/fr/maca_trans_tagger/maca_trans_tagger.mcd new file mode 100644 index 0000000..6b58caa --- /dev/null +++ b/fr/maca_trans_tagger/maca_trans_tagger.mcd @@ -0,0 +1,2 @@ +1 FORM VOCAB _ +4 POS VOCAB _ -- GitLab