diff --git a/ftb/Makefile b/ftb/Makefile index bf7b20ec67ce500e7e52657b3f091348c44fc09e..c3cb09fbfd25eb27b542b8306f084ec73e57932f 100644 --- a/ftb/Makefile +++ b/ftb/Makefile @@ -14,6 +14,7 @@ compile: $(MAKE) -C maca_lexer compile $(MAKE) -C maca_trans_tagger compile $(MAKE) -C maca_trans_morpho compile + $(MAKE) -C maca_trans_lemmatizer compile $(MAKE) -C maca_trans_parser compile # $(MAKE) -C maca_crf_tagger compile @@ -22,13 +23,14 @@ install: $(MAKE) -C maca_lexer install $(MAKE) -C maca_trans_tagger install $(MAKE) -C maca_trans_morpho install - $(MAKE) -C maca_lemmatizer install + $(MAKE) -C maca_trans_lemmatizer install $(MAKE) -C maca_trans_parser install # $(MAKE) -C maca_crf_tagger install # @tar -cvzf ./maca_datas.tgz bin evaluation: - $(MAKE) LANGUAGE=$(_LANG) CODE=$(_LANG) -C eval +# $(MAKE) LANGUAGE=$(_LANG) CODE=$(_LANG) -C eval + $(MAKE) -C eval clean: $(MAKE) -C data/morpho-lexicon clean diff --git a/ftb/data/morpho-lexicon/Makefile b/ftb/data/morpho-lexicon/Makefile index ded55daca35c2eaea2799fab8074e309829ae917..e91786046362b05a783acfb9490136dcfd282c93 100644 --- a/ftb/data/morpho-lexicon/Makefile +++ b/ftb/data/morpho-lexicon/Makefile @@ -1,4 +1,4 @@ -#DATA_DIR=../../../data/ud-treebanks-conll2017/UD_French -#TRAIN=$(DATA_DIR)/fr-ud-train.conllu +DATA_DIR=../../../data/ftb +TRAIN=$(DATA_DIR)/ftb.train.conll07 include ../../../makefiles/morpho-lexicon_ud.makefile diff --git a/ftb/data/treebank/Makefile b/ftb/data/treebank/Makefile index 07eda4dc44b4ef13cf2eb81af898d03d75bdafe8..95ce0230f69daf5831e883df92cabcf139ae22dc 100644 --- a/ftb/data/treebank/Makefile +++ b/ftb/data/treebank/Makefile @@ -1,7 +1,7 @@ -#DATA_DIR=../../../data/ud-treebanks-conll2017/UD_French -#TRAIN=$(DATA_DIR)/fr-ud-train.conllu -#TEST=$(DATA_DIR)/fr-ud-dev.conllu -#DEV=$(DATA_DIR)/fr-ud-dev.conllu +DATA_DIR=../../../data/ftb +TRAIN=$(DATA_DIR)/ftb.train.conll07 +TEST=$(DATA_DIR)/ftb.test.conll07 +DEV=$(DATA_DIR)/ftb.dev.conll07 include ../../../makefiles/treebank_ud.makefile diff --git a/ftb/eval/Makefile b/ftb/eval/Makefile index f2f973ca61df5251fd40d4aecaca8de695a9e813..214b5fddee226d3beee03d72997da65cc72c8837 100644 --- a/ftb/eval/Makefile +++ b/ftb/eval/Makefile @@ -1,5 +1,5 @@ -#LANGUAGE=UD_French +LANGUAGE=ftb WPMLGFS_MCD_FILE=../../mcd/wpmlgfs.mcd WPLGFS_MCD_FILE=../../mcd/wplgfs.mcd TEST=../data/treebank/test.mcf diff --git a/ftb/maca_lemmatizer/Makefile b/ftb/maca_lemmatizer/Makefile deleted file mode 100644 index 0aca0791623ee752848321c912c57b6ce1119310..0000000000000000000000000000000000000000 --- a/ftb/maca_lemmatizer/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -DATA=../data/morpho-lexicon - -##----------------------------------------------------------------------- -## compile -##----------------------------------------------------------------------- - -##----------------------------------------------------------------------- -## install -##----------------------------------------------------------------------- - -install: - - cp $(DATA)/fplm ../bin - -##----------------------------------------------------------------------- -## clean -##----------------------------------------------------------------------- - - diff --git a/ftb/maca_trans_lemmatizer/Makefile b/ftb/maca_trans_lemmatizer/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bd188652b86fb1c954c6d325a5e96acb13157600 --- /dev/null +++ b/ftb/maca_trans_lemmatizer/Makefile @@ -0,0 +1,15 @@ +MCF_TRAIN=../data/treebank/train.mcf +MCF_DEV=../data/treebank/dev.mcf +MCF_TEST=../data/treebank/test.mcf +CFF_TRAIN=train.cff +MODEL_FILENAME=maca_trans_lemmatizer.model +RULES_FILENAME=maca_trans_lemmatizer_rules.txt +EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm +VOCABS_FILENAME=maca_trans_lemmatizer.vocab +FEATURES_MODEL_FILENAME=maca_trans_lemmatizer.fm +FPLM_FILENAME=../data/morpho-lexicon/fplm +THRESHOLD=50 +STRICT=-s + + +include ../../makefiles/maca_trans_lemmatizer.makefile diff --git a/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm b/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm new file mode 100644 index 0000000000000000000000000000000000000000..ddeb05b623beeff53b9498d0b3b13e898d7b01db --- /dev/null +++ b/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm @@ -0,0 +1,28 @@ +#b0U1 +#b0p b0f +#b0len +#bm1f +#bm2f +#b0p +#bm1p +#bm1m +#bm1p bm1m +#bm2p +#bm3p +#bm2p bm1p +#bm2p bm3p +#b0p b0s1 +#b0s2 +#b0s3 +#b0s4 +#b0s5 +b0p b0s1 b0s2 +b0p b0s2 b0s3 +b0p b0s3 b0s4 +b0p b0s4 b0s5 +b0p b0s5 b0s6 +b0p b0s1 b0s2 +b0p b0s1 b0s2 b0s3 +b0p b0s1 b0s2 b0s3 b0s4 +b0p b0s1 b0s2 b0s3 b0s4 b0s5 +b0p b0s1 b0s2 b0s3 b0s4 b0s5 b0s6 diff --git a/makefiles/morpho-lexicon_ud.makefile b/makefiles/morpho-lexicon_ud.makefile index e40990a582d250adf38311830c1a7288b2baad9d..21b6b9095b1904b8ef8097014d71b0d728ce5f31 100644 --- a/makefiles/morpho-lexicon_ud.makefile +++ b/makefiles/morpho-lexicon_ud.makefile @@ -3,7 +3,8 @@ TOOLS=../../../tools/ compile: fplm fP fplm: - $(TOOLS)conll2fplm.pl -c -f $(TRAIN) > $@ + $(TOOLS)conll2fplm.pl -f $(TRAIN) > $@ +# $(TOOLS)conll2fplm.pl -c -f $(TRAIN) > $@ fP: fplm $(TOOLS)fplm2fP_ud.pl $< > $@ diff --git a/makefiles/treebank_ud.makefile b/makefiles/treebank_ud.makefile index 1f9cfea3b97bac45fd3f12341c6ad5c67f9ad6d0..a13db44e12c971fc04e67bb67ba0086fb5e5547f 100644 --- a/makefiles/treebank_ud.makefile +++ b/makefiles/treebank_ud.makefile @@ -5,16 +5,16 @@ compile: train.mcf test.mcf dev.mcf train.mcf: $(TRAIN) $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp - $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@ test.mcf: $(TEST) $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp - $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@ dev.mcf: $(TEST) $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp - $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@ clean: