From 38ae587712dbdf662e4809ec21901e5f49537a6f Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Wed, 17 May 2017 14:16:17 +0200
Subject: [PATCH] modifier ftb so that it uses the pos tags instead of coarse
 pos tags

---
 ftb/data/morpho-lexicon/Makefile              |  4 +--
 ftb/data/treebank/Makefile                    |  8 +++---
 ftb/eval/Makefile                             |  2 +-
 ftb/maca_lemmatizer/Makefile                  | 18 ------------
 ftb/maca_trans_lemmatizer/Makefile            | 15 ++++++++++
 .../maca_trans_lemmatizer.fm                  | 28 +++++++++++++++++++
 makefiles/morpho-lexicon_ud.makefile          |  3 +-
 makefiles/treebank_ud.makefile                |  6 ++--
 8 files changed, 55 insertions(+), 29 deletions(-)
 delete mode 100644 ftb/maca_lemmatizer/Makefile
 create mode 100644 ftb/maca_trans_lemmatizer/Makefile
 create mode 100644 ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm

diff --git a/ftb/data/morpho-lexicon/Makefile b/ftb/data/morpho-lexicon/Makefile
index ded55da..e917860 100644
--- a/ftb/data/morpho-lexicon/Makefile
+++ b/ftb/data/morpho-lexicon/Makefile
@@ -1,4 +1,4 @@
-#DATA_DIR=../../../data/ud-treebanks-conll2017/UD_French
-#TRAIN=$(DATA_DIR)/fr-ud-train.conllu
+DATA_DIR=../../../data/ftb
+TRAIN=$(DATA_DIR)/ftb.train.conll07
 
 include ../../../makefiles/morpho-lexicon_ud.makefile
diff --git a/ftb/data/treebank/Makefile b/ftb/data/treebank/Makefile
index 07eda4d..95ce023 100644
--- a/ftb/data/treebank/Makefile
+++ b/ftb/data/treebank/Makefile
@@ -1,7 +1,7 @@
-#DATA_DIR=../../../data/ud-treebanks-conll2017/UD_French
-#TRAIN=$(DATA_DIR)/fr-ud-train.conllu
-#TEST=$(DATA_DIR)/fr-ud-dev.conllu
-#DEV=$(DATA_DIR)/fr-ud-dev.conllu
+DATA_DIR=../../../data/ftb
+TRAIN=$(DATA_DIR)/ftb.train.conll07
+TEST=$(DATA_DIR)/ftb.test.conll07
+DEV=$(DATA_DIR)/ftb.dev.conll07
 
 include ../../../makefiles/treebank_ud.makefile
 
diff --git a/ftb/eval/Makefile b/ftb/eval/Makefile
index f2f973c..214b5fd 100644
--- a/ftb/eval/Makefile
+++ b/ftb/eval/Makefile
@@ -1,5 +1,5 @@
 
-#LANGUAGE=UD_French
+LANGUAGE=ftb
 WPMLGFS_MCD_FILE=../../mcd/wpmlgfs.mcd
 WPLGFS_MCD_FILE=../../mcd/wplgfs.mcd
 TEST=../data/treebank/test.mcf
diff --git a/ftb/maca_lemmatizer/Makefile b/ftb/maca_lemmatizer/Makefile
deleted file mode 100644
index 0aca079..0000000
--- a/ftb/maca_lemmatizer/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-DATA=../data/morpho-lexicon
-
-##-----------------------------------------------------------------------
-## compile
-##-----------------------------------------------------------------------
-
-##-----------------------------------------------------------------------
-## install
-##-----------------------------------------------------------------------
-
-install:
-	- cp $(DATA)/fplm ../bin
-
-##-----------------------------------------------------------------------
-## clean
-##-----------------------------------------------------------------------
-
-
diff --git a/ftb/maca_trans_lemmatizer/Makefile b/ftb/maca_trans_lemmatizer/Makefile
new file mode 100644
index 0000000..bd18865
--- /dev/null
+++ b/ftb/maca_trans_lemmatizer/Makefile
@@ -0,0 +1,15 @@
+MCF_TRAIN=../data/treebank/train.mcf
+MCF_DEV=../data/treebank/dev.mcf
+MCF_TEST=../data/treebank/test.mcf
+CFF_TRAIN=train.cff
+MODEL_FILENAME=maca_trans_lemmatizer.model 
+RULES_FILENAME=maca_trans_lemmatizer_rules.txt
+EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm
+VOCABS_FILENAME=maca_trans_lemmatizer.vocab 
+FEATURES_MODEL_FILENAME=maca_trans_lemmatizer.fm
+FPLM_FILENAME=../data/morpho-lexicon/fplm
+THRESHOLD=50
+STRICT=-s
+
+
+include ../../makefiles/maca_trans_lemmatizer.makefile
diff --git a/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm b/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm
new file mode 100644
index 0000000..ddeb05b
--- /dev/null
+++ b/ftb/maca_trans_lemmatizer/maca_trans_lemmatizer.fm
@@ -0,0 +1,28 @@
+#b0U1
+#b0p b0f
+#b0len
+#bm1f
+#bm2f
+#b0p
+#bm1p
+#bm1m
+#bm1p bm1m
+#bm2p
+#bm3p
+#bm2p bm1p
+#bm2p bm3p
+#b0p b0s1
+#b0s2
+#b0s3
+#b0s4
+#b0s5
+b0p b0s1 b0s2
+b0p b0s2 b0s3
+b0p b0s3 b0s4
+b0p b0s4 b0s5
+b0p b0s5 b0s6
+b0p b0s1 b0s2
+b0p b0s1 b0s2 b0s3
+b0p b0s1 b0s2 b0s3 b0s4
+b0p b0s1 b0s2 b0s3 b0s4 b0s5
+b0p b0s1 b0s2 b0s3 b0s4 b0s5 b0s6
diff --git a/makefiles/morpho-lexicon_ud.makefile b/makefiles/morpho-lexicon_ud.makefile
index e40990a..21b6b90 100644
--- a/makefiles/morpho-lexicon_ud.makefile
+++ b/makefiles/morpho-lexicon_ud.makefile
@@ -3,7 +3,8 @@ TOOLS=../../../tools/
 compile: fplm fP
 
 fplm:
-	$(TOOLS)conll2fplm.pl -c -f $(TRAIN) > $@
+	$(TOOLS)conll2fplm.pl -f $(TRAIN) > $@
+#	$(TOOLS)conll2fplm.pl -c -f $(TRAIN) > $@
 
 fP: fplm
 	$(TOOLS)fplm2fP_ud.pl $< > $@
diff --git a/makefiles/treebank_ud.makefile b/makefiles/treebank_ud.makefile
index 1f9cfea..a13db44 100644
--- a/makefiles/treebank_ud.makefile
+++ b/makefiles/treebank_ud.makefile
@@ -5,16 +5,16 @@ compile: train.mcf  test.mcf dev.mcf
 
 train.mcf: $(TRAIN)
 	$(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp
-	$(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@
+	$(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@
 
 test.mcf: $(TEST)
 	$(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp
-	$(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@
+	$(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@
 
 
 dev.mcf: $(TEST)
 	$(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp
-	$(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@
+	$(TOOLS)/conllu2mcf -f tmp -1W -2P -3F -4L -5H -6D > $@
 
 
 clean: 
-- 
GitLab