diff --git a/template/Makefile b/template/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ac96876c4cff264fd366586e3ae3ebed60436c6b --- /dev/null +++ b/template/Makefile @@ -0,0 +1,32 @@ +## Compile all data + +all: compile install evaluation + +compile: + $(MAKE) -C data/morpho-lexicon compile + $(MAKE) -C data/treebank compile + $(MAKE) -C maca_lexer compile + $(MAKE) -C maca_trans_parser compile + $(MAKE) -C maca_trans_tagger compile +# $(MAKE) -C maca_crf_tagger compile + +install: + -mkdir -p bin + $(MAKE) -C maca_trans_parser install + $(MAKE) -C maca_trans_tagger install + $(MAKE) -C maca_lemmatizer install + $(MAKE) -C maca_lexer install +# $(MAKE) -C maca_crf_tagger install +# @tar -cvzf ./maca_datas.tgz bin + +evaluation: + $(MAKE) -C eval + +clean: + $(MAKE) -C data/morpho-lexicon clean + $(MAKE) -C data/treebank clean + $(MAKE) -C maca_lexer clean + $(MAKE) -C maca_trans_parser clean + $(MAKE) -C maca_trans_tagger clean +# $(MAKE) -C maca_crf_tagger clean + $(MAKE) -C eval clean diff --git a/template/data/morpho-lexicon/Makefile b/template/data/morpho-lexicon/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bd7cfa4f03a7169c8bbf39722db7f12a31732b97 --- /dev/null +++ b/template/data/morpho-lexicon/Makefile @@ -0,0 +1,17 @@ +TOOLS=../../../fr/tools/ +TOOLS_MACA_DATA=../../../tools/ +LEFFF_DIR=../../../data/lefff + +compile: fplm fP + +fplm: $(TOOLS)lefff2fplm.pl $(TOOLS)fplm_change_pos.pl $(LEFFF_DIR)/*.lex fplm_add + $(TOOLS)lefff2fplm.pl $(LEFFF_DIR) | $(TOOLS)fplm_change_pos.pl > tmp + cat tmp fplm_add | sort | uniq > $@ + +fP: fplm + $(TOOLS_MACA_DATA)fplm2fP.pl $< > $@ + +clean: + -rm fplm + -rm fP + -rm tmp diff --git a/template/data/morpho-lexicon/fplm_add b/template/data/morpho-lexicon/fplm_add new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/template/data/treebank/Makefile b/template/data/treebank/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f946786a7fccb65f35f62efc258df72722c64949 --- /dev/null +++ b/template/data/treebank/Makefile @@ -0,0 +1,19 @@ +TOOLS=../../../tools +FTB_DIR=../../../data/ftb +TRAIN=$(FTB_DIR)/ftb.train.conll07 +TEST=$(FTB_DIR)/ftb.test.conll07 +DEV=$(FTB_DIR)/ftb.dev.conll07 + +compile: train.mcf test.mcf dev.mcf + +train.mcf: $(TRAIN) + $(TOOLS)/conll2mcf -f $< -1W -2C -3L -4H -5D > $@ + +test.mcf: $(TEST) + $(TOOLS)/conll2mcf -f $< -1W -2C -3L -4H -5D > $@ + +dev.mcf: $(TEST) + $(TOOLS)/conll2mcf -f $< -1W -2C -3L -4H -5D > $@ + +clean: + - rm test.mcf train.mcf dev.mcf diff --git a/template/eval/Makefile b/template/eval/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e21cf310a0203bb11170e84d3a75493d0e3b37d3 --- /dev/null +++ b/template/eval/Makefile @@ -0,0 +1,6 @@ +TEST=../data/treebank/test.mcf +DEV=../data/treebank/dev.mcf +LANGUAGE=example +MCD_FILE=wplgfs.mcd + +include ../../makefiles/eval.makefile diff --git a/template/eval/wplgfs.mcd b/template/eval/wplgfs.mcd new file mode 100644 index 0000000000000000000000000000000000000000..c942b51e2e2d348357cdbda245213b6430e6f4c5 --- /dev/null +++ b/template/eval/wplgfs.mcd @@ -0,0 +1,6 @@ +1 FORM VOCAB _ +2 POS VOCAB _ +3 LEMMA VOCAB _ +4 GOV INT _ +5 LABEL VOCAB _ +6 SENT_SEG INT _ diff --git a/template/maca_lemmatizer/Makefile b/template/maca_lemmatizer/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..0aca0791623ee752848321c912c57b6ce1119310 --- /dev/null +++ b/template/maca_lemmatizer/Makefile @@ -0,0 +1,18 @@ +DATA=../data/morpho-lexicon + +##----------------------------------------------------------------------- +## compile +##----------------------------------------------------------------------- + +##----------------------------------------------------------------------- +## install +##----------------------------------------------------------------------- + +install: + - cp $(DATA)/fplm ../bin + +##----------------------------------------------------------------------- +## clean +##----------------------------------------------------------------------- + + diff --git a/template/maca_lexer/Makefile b/template/maca_lexer/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..78de2033cd082427e74e59f8c848cbe51e01587f --- /dev/null +++ b/template/maca_lexer/Makefile @@ -0,0 +1,4 @@ +FPLM_FILE=../data/morpho-lexicon/fplm + +include ../../makefiles/maca_lexer.makefile + diff --git a/template/maca_trans_parser/Makefile b/template/maca_trans_parser/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..86b9cbf7e7ceebaa0c8664a8603322797494b8f2 --- /dev/null +++ b/template/maca_trans_parser/Makefile @@ -0,0 +1,17 @@ +MCF_TRAIN=../data/treebank/train.mcf +MCF_DEV=../data/treebank/dev.mcf +MCF_TEST=../data/treebank/test.mcf + +CFF_TRAIN=train.cff +CFF_CUTOFF_TRAIN=train.cutoff.cff +PERCEPTRON_ITERATIONS=5 +CFF_CUTOFF=3 +FEATURES_MODEL_FILENAME=maca_trans_parser.fm +VOCABS_FILENAME=maca_trans_parser.vocab +MODEL_FILENAME=maca_trans_parser.model +NUMBER_OF_SENTENCES=10000000 +#NUMBER_OF_SENTENCES=1000 +MCD_FILENAME=wplgfs.mcd +STREAM_MODE= -S + +include ../../makefiles/maca_trans_parser.makefile diff --git a/template/maca_trans_parser/maca_trans_parser.fm b/template/maca_trans_parser/maca_trans_parser.fm new file mode 100644 index 0000000000000000000000000000000000000000..aface90683fc20dd4e5ec15fbd012b371436323b --- /dev/null +++ b/template/maca_trans_parser/maca_trans_parser.fm @@ -0,0 +1,49 @@ +b0g +s0g s0p +s0g b0p +s0g +s0sf +#s1g +#s1sf +s0l +s0p +s1p +s2p +b0l +b0p +b1l +b1p +b2p +b3p +ldep_s0r +rdep_s0r +ldep_s1r +rdep_s1r +ldep_b0r +rdep_b0r +s0l b0l +s0p b0p +b0p b0l +b0p ldep_b0r +s1p b1p +b1p b2p +s0p b0p b0l +s0p ldep_s0r rdep_s0r +s0p s0l b0p +s0p b0p dist_s0_b0 +s1p s0p b0p +b0p b1p b2p +b1p b2p b3p +s0p b0p b1p +b1p b1l b2p b3p +b1p b1l b2p b2l b3p +t1 +t2 +#t3 +#t4 +#t1 t2 +#t2 t3 +#t1 t2 t3 + +bm1p +bm2p diff --git a/template/maca_trans_parser/wplgfs.mcd b/template/maca_trans_parser/wplgfs.mcd new file mode 100644 index 0000000000000000000000000000000000000000..c942b51e2e2d348357cdbda245213b6430e6f4c5 --- /dev/null +++ b/template/maca_trans_parser/wplgfs.mcd @@ -0,0 +1,6 @@ +1 FORM VOCAB _ +2 POS VOCAB _ +3 LEMMA VOCAB _ +4 GOV INT _ +5 LABEL VOCAB _ +6 SENT_SEG INT _ diff --git a/template/maca_trans_tagger/Makefile b/template/maca_trans_tagger/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..baa1fbdc6149fa78935b2be6131675411f9fd59f --- /dev/null +++ b/template/maca_trans_tagger/Makefile @@ -0,0 +1,21 @@ +MCF_TRAIN=../data/treebank/train.mcf +MCF_DEV=../data/treebank/dev.mcf +MCF_TEST=../data/treebank/test.mcf + + +CFF_TRAIN=train.cff +CFF_CUTOFF_TRAIN=train.cutoff.cff +PERCEPTRON_ITERATIONS=9 +CFF_CUTOFF=1 +FEATURES_MODEL_FILENAME=maca_trans_tagger.fm +VOCABS_FILENAME=maca_trans_tagger.vocab +MCD_FILENAME=maca_trans_tagger.mcd +MODEL_FILENAME=maca_trans_tagger.model +NUMBER_OF_SENTENCES=10000000 +STREAM_MODE= -S + +FORM_POS_FILENAME=../data/morpho-lexicon/fP + +#include ./maca_trans_tagger.makefile +include ../../makefiles/maca_trans_tagger.makefile + diff --git a/template/maca_trans_tagger/maca_trans_tagger.fm b/template/maca_trans_tagger/maca_trans_tagger.fm new file mode 100644 index 0000000000000000000000000000000000000000..ddeac496238341129b1d03e96ff3e203cf5a0d24 --- /dev/null +++ b/template/maca_trans_tagger/maca_trans_tagger.fm @@ -0,0 +1,12 @@ +b0U1 +b0sgn +b0f +b0len +bm1f +bm2f +bm1p +bm2p +bm3p +bm2p bm1p +bm2p bm3p +bm1p b0sgn diff --git a/template/maca_trans_tagger/maca_trans_tagger.mcd b/template/maca_trans_tagger/maca_trans_tagger.mcd new file mode 100644 index 0000000000000000000000000000000000000000..345e089e486ac1f73af13d60a5fedee2a6cca4bb --- /dev/null +++ b/template/maca_trans_tagger/maca_trans_tagger.mcd @@ -0,0 +1,2 @@ +1 FORM VOCAB _ +2 POS VOCAB _