diff --git a/d6/Makefile b/d6/Makefile index 042933606547d842c348247bbc2d62a71de6f74e..363605338793910476b6b2df78a9c40510f5e903 100644 --- a/d6/Makefile +++ b/d6/Makefile @@ -11,7 +11,7 @@ install: $(MAKE) -C maca_trans_parser install # $(MAKE) -C maca_lemmatizer install # $(MAKE) -C maca_crf_tagger install - @tar -cvzf ./maca_datas.tgz bin +# @tar -cvzf ./maca_datas.tgz bin clean: # $(MAKE) -C data/morpho-lexicon clean diff --git a/fr/Makefile b/fr/Makefile index 0a170aaeb026526dcba11f587c2fc62a262eaf44..a3cb14a526d0cff31193b9cc8962399b09af3ba4 100644 --- a/fr/Makefile +++ b/fr/Makefile @@ -4,17 +4,20 @@ compile: $(MAKE) -C data/morpho-lexicon compile $(MAKE) -C data/treebank compile $(MAKE) -C maca_trans_parser compile + $(MAKE) -C maca_trans_tagger compile $(MAKE) -C maca_crf_tagger compile install: -mkdir -p bin $(MAKE) -C maca_trans_parser install + $(MAKE) -C maca_trans_tagger install $(MAKE) -C maca_lemmatizer install $(MAKE) -C maca_crf_tagger install - @tar -cvzf ./maca_datas.tgz bin +# @tar -cvzf ./maca_datas.tgz bin clean: $(MAKE) -C data/morpho-lexicon clean $(MAKE) -C data/treebank clean $(MAKE) -C maca_trans_parser clean + $(MAKE) -C maca_trans_tagger clean $(MAKE) -C maca_crf_tagger clean diff --git a/fr/maca_trans_tagger/Makefile b/fr/maca_trans_tagger/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b5286c9cff4d227d43b0746b7acd9008439cc0fd --- /dev/null +++ b/fr/maca_trans_tagger/Makefile @@ -0,0 +1,52 @@ +CONLL07TRAIN=../data/treebank/train.conll07 +CONLL07DEV=../data/treebank/dev.conll07 +CONLL07TEST=../data/treebank/test.conll07 + +CFF_TRAIN=train.cff +CFF_CUTOFF_TRAIN=train.cutoff.cff +PERCEPTRON_ITERATIONS=5 +CFF_CUTOFF=1 +FEATURES_MODEL_FILENAME=maca_trans_tagger.fm +VOCABS_FILENAME=maca_trans_tagger.vocab +MCD_FILENAME=maca_trans_tagger.mcd +MODEL_FILENAME=maca_trans_tagger.model +NUMBER_OF_SENTENCES=10000000 +STREAM_MODE=0 + + +##----------------------------------------------------------------------- +## compile +##----------------------------------------------------------------------- + +compile: $(MODEL_FILENAME) + +$(CFF_TRAIN): $(CONLL07TRAIN) + maca_trans_parser_conll2cff_tagger --conll $< --mode TRAIN --feat_model $(FEATURES_MODEL_FILENAME) --vocabs $(VOCABS_FILENAME) --cff $@ -s $(NUMBER_OF_SENTENCES) -T $(STREAM_MODE) -C $(MCD_FILENAME) + +$(CFF_CUTOFF_TRAIN): $(CFF_TRAIN) + maca_trans_parser_cff_cutoff --cff $< --vocabs $(VOCABS_FILENAME) --cutoff $(CFF_CUTOFF) > $@ + +#$(MODEL_FILENAME): $(CFF_CUTOFF_TRAIN) +$(MODEL_FILENAME): $(CFF_TRAIN) + maca_trans_parser_train_from_cff --cff $< --model $(MODEL_FILENAME) -n $(PERCEPTRON_ITERATIONS) + +##----------------------------------------------------------------------- +## install +##----------------------------------------------------------------------- + +install: + - cp $(FEATURES_MODEL_FILENAME) ../bin + - cp $(VOCABS_FILENAME) ../bin + - cp $(MODEL_FILENAME) ../bin + +##----------------------------------------------------------------------- +## clean +##----------------------------------------------------------------------- + +clean: + - rm -f $(VOCABS_FILENAME) + - rm -f $(MODEL_FILENAME) + - rm -f $(CFF_TRAIN) + - rm -f $(CFF_CUTOFF_TRAIN) + + diff --git a/fr/maca_trans_tagger/maca_trans_tagger.fm b/fr/maca_trans_tagger/maca_trans_tagger.fm new file mode 100644 index 0000000000000000000000000000000000000000..a7378070b201d5f415cb3a9261207dde1e93c547 --- /dev/null +++ b/fr/maca_trans_tagger/maca_trans_tagger.fm @@ -0,0 +1,11 @@ +b0f +b1f +b2f +s0f +s1f +s0p +s1p +s2p +s0p s1p +s0p s1p s2p +s1p s2p diff --git a/fr/maca_trans_tagger/maca_trans_tagger.mcd b/fr/maca_trans_tagger/maca_trans_tagger.mcd new file mode 100644 index 0000000000000000000000000000000000000000..6b58caa20cd7595435c4536952f118587f552158 --- /dev/null +++ b/fr/maca_trans_tagger/maca_trans_tagger.mcd @@ -0,0 +1,2 @@ +1 FORM VOCAB _ +4 POS VOCAB _