From 38ae587712dbdf662e4809ec21901e5f49537a6f Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Wed, 17 May 2017 14:16:17 +0200 Subject: [PATCH] modifier ftb so that it uses the pos tags instead of coarse pos tags --- ftb/data/morpho-lexicon/Makefile | 4 +-- ftb/data/treebank/Makefile | 8 +++--- ftb/eval/Makefile | 2 +- ftb/maca_lemmatizer/Makefile | 18 ------------ ftb/maca_trans_lemmatizer/Makefile | 15 ++++++++++ .../maca_trans_lemmatizer.fm | 28 +++++++++++++++++++ makefiles/morpho-lexicon_ud.makefile | 3 +- makefiles/treebank_ud.makefile | 6 ++-- 8 files changed, 55 insertions(+), 29 deletions(-) delete mode 100644 ftb/maca_lemmatizer/Makefile create mode 100644 ftb/maca_trans_lemmatizer/Makefile create mode 100644 ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm diff --git a/ftb/data/morpho-lexicon/Makefile b/ftb/data/morpho-lexicon/Makefile index ded55da..e917860 100644 --- a/ftb/data/morpho-lexicon/Makefile +++ b/ftb/data/morpho-lexicon/Makefile @@ -1,4 +1,4 @@ -#DATA_DIR=../../../data/ud-treebanks-conll2017/UD_French -#TRAIN=$(DATA_DIR)/fr-ud-train.conllu +DATA_DIR=../../../data/ftb +TRAIN=$(DATA_DIR)/ftb.train.conll07 include ../../../makefiles/morpho-lexicon_ud.makefile diff --git a/ftb/data/treebank/Makefile b/ftb/data/treebank/Makefile index 07eda4d..95ce023 100644 --- a/ftb/data/treebank/Makefile +++ b/ftb/data/treebank/Makefile @@ -1,7 +1,7 @@ -#DATA_DIR=../../../data/ud-treebanks-conll2017/UD_French -#TRAIN=$(DATA_DIR)/fr-ud-train.conllu -#TEST=$(DATA_DIR)/fr-ud-dev.conllu -#DEV=$(DATA_DIR)/fr-ud-dev.conllu +DATA_DIR=../../../data/ftb +TRAIN=$(DATA_DIR)/ftb.train.conll07 +TEST=$(DATA_DIR)/ftb.test.conll07 +DEV=$(DATA_DIR)/ftb.dev.conll07 include ../../../makefiles/treebank_ud.makefile diff --git a/ftb/eval/Makefile b/ftb/eval/Makefile index f2f973c..214b5fd 100644 --- a/ftb/eval/Makefile +++ b/ftb/eval/Makefile @@ -1,5 +1,5 @@ -#LANGUAGE=UD_French +LANGUAGE=ftb WPMLGFS_MCD_FILE=../../mcd/wpmlgfs.mcd WPLGFS_MCD_FILE=../../mcd/wplgfs.mcd TEST=../data/treebank/test.mcf diff --git a/ftb/maca_lemmatizer/Makefile b/ftb/maca_lemmatizer/Makefile deleted file mode 100644 index 0aca079..0000000 --- a/ftb/maca_lemmatizer/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -DATA=../data/morpho-lexicon - -##----------------------------------------------------------------------- -## compile -##----------------------------------------------------------------------- - -##----------------------------------------------------------------------- -## install -##----------------------------------------------------------------------- - -install: - - cp $(DATA)/fplm ../bin - -##----------------------------------------------------------------------- -## clean -##----------------------------------------------------------------------- - - diff --git a/ftb/maca_trans_lemmatizer/Makefile b/ftb/maca_trans_lemmatizer/Makefile new file mode 100644 index 0000000..bd18865 --- /dev/null +++ b/ftb/maca_trans_lemmatizer/Makefile @@ -0,0 +1,15 @@ +MCF_TRAIN=../data/treebank/train.mcf +MCF_DEV=../data/treebank/dev.mcf +MCF_TEST=../data/treebank/test.mcf +CFF_TRAIN=train.cff +MODEL_FILENAME=maca_trans_lemmatizer.model +RULES_FILENAME=maca_trans_lemmatizer_rules.txt +EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm +VOCABS_FILENAME=maca_trans_lemmatizer.vocab +FEATURES_MODEL_FILENAME=maca_trans_lemmatizer.fm +FPLM_FILENAME=../data/morpho-lexicon/fplm +THRESHOLD=50 +STRICT=-s + + +include ../../makefiles/maca_trans_lemmatizer.makefile diff --git a/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm b/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm new file mode 100644 index 0000000..ddeb05b --- /dev/null +++ b/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm @@ -0,0 +1,28 @@ +#b0U1 +#b0p b0f +#b0len +#bm1f +#bm2f +#b0p +#bm1p +#bm1m +#bm1p bm1m +#bm2p +#bm3p +#bm2p bm1p +#bm2p bm3p +#b0p b0s1 +#b0s2 +#b0s3 +#b0s4 +#b0s5 +b0p b0s1 b0s2 +b0p b0s2 b0s3 +b0p b0s3 b0s4 +b0p b0s4 b0s5 +b0p b0s5 b0s6 +b0p b0s1 b0s2 +b0p b0s1 b0s2 b0s3 +b0p b0s1 b0s2 b0s3 b0s4 +b0p b0s1 b0s2 b0s3 b0s4 b0s5 +b0p b0s1 b0s2 b0s3 b0s4 b0s5 b0s6 diff --git a/makefiles/morpho-lexicon_ud.makefile b/makefiles/morpho-lexicon_ud.makefile index e40990a..21b6b90 100644 --- a/makefiles/morpho-lexicon_ud.makefile +++ b/makefiles/morpho-lexicon_ud.makefile @@ -3,7 +3,8 @@ TOOLS=../../../tools/ compile: fplm fP fplm: - $(TOOLS)conll2fplm.pl -c -f $(TRAIN) > $@ + $(TOOLS)conll2fplm.pl -f $(TRAIN) > $@ +# $(TOOLS)conll2fplm.pl -c -f $(TRAIN) > $@ fP: fplm $(TOOLS)fplm2fP_ud.pl $< > $@ diff --git a/makefiles/treebank_ud.makefile b/makefiles/treebank_ud.makefile index 1f9cfea..a13db44 100644 --- a/makefiles/treebank_ud.makefile +++ b/makefiles/treebank_ud.makefile @@ -5,16 +5,16 @@ compile: train.mcf test.mcf dev.mcf train.mcf: $(TRAIN) $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp - $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@ test.mcf: $(TEST) $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp - $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@ dev.mcf: $(TEST) $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp - $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@ clean: -- GitLab