From 2154d4e79f170eefcab31b0f98c2b2c725b66c4b Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Fri, 8 Jul 2016 15:57:52 -0400
Subject: [PATCH] added a transition tagger for fr

---
 d6/Makefile                                |  2 +-
 fr/Makefile                                |  5 ++-
 fr/maca_trans_tagger/Makefile              | 52 ++++++++++++++++++++++
 fr/maca_trans_tagger/maca_trans_tagger.fm  | 11 +++++
 fr/maca_trans_tagger/maca_trans_tagger.mcd |  2 +
 5 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 fr/maca_trans_tagger/Makefile
 create mode 100644 fr/maca_trans_tagger/maca_trans_tagger.fm
 create mode 100644 fr/maca_trans_tagger/maca_trans_tagger.mcd

diff --git a/d6/Makefile b/d6/Makefile
index 0429336..3636053 100644
--- a/d6/Makefile
+++ b/d6/Makefile
@@ -11,7 +11,7 @@ install:
 	$(MAKE) -C maca_trans_parser install
 #	$(MAKE) -C maca_lemmatizer install
 #	$(MAKE) -C maca_crf_tagger install
-	@tar -cvzf ./maca_datas.tgz bin
+#	@tar -cvzf ./maca_datas.tgz bin
 
 clean:
 #	$(MAKE) -C data/morpho-lexicon clean
diff --git a/fr/Makefile b/fr/Makefile
index 0a170aa..a3cb14a 100644
--- a/fr/Makefile
+++ b/fr/Makefile
@@ -4,17 +4,20 @@ compile:
 	$(MAKE) -C data/morpho-lexicon compile
 	$(MAKE) -C data/treebank compile
 	$(MAKE) -C maca_trans_parser compile
+	$(MAKE) -C maca_trans_tagger compile
 	$(MAKE) -C maca_crf_tagger compile
 
 install: 
 	-mkdir -p bin
 	$(MAKE) -C maca_trans_parser install
+	$(MAKE) -C maca_trans_tagger install
 	$(MAKE) -C maca_lemmatizer install
 	$(MAKE) -C maca_crf_tagger install
-	@tar -cvzf ./maca_datas.tgz bin
+#	@tar -cvzf ./maca_datas.tgz bin
 
 clean:
 	$(MAKE) -C data/morpho-lexicon clean
 	$(MAKE) -C data/treebank clean
 	$(MAKE) -C maca_trans_parser clean
+	$(MAKE) -C maca_trans_tagger clean
 	$(MAKE) -C maca_crf_tagger clean
diff --git a/fr/maca_trans_tagger/Makefile b/fr/maca_trans_tagger/Makefile
new file mode 100644
index 0000000..b5286c9
--- /dev/null
+++ b/fr/maca_trans_tagger/Makefile
@@ -0,0 +1,52 @@
+CONLL07TRAIN=../data/treebank/train.conll07
+CONLL07DEV=../data/treebank/dev.conll07
+CONLL07TEST=../data/treebank/test.conll07
+
+CFF_TRAIN=train.cff
+CFF_CUTOFF_TRAIN=train.cutoff.cff
+PERCEPTRON_ITERATIONS=5
+CFF_CUTOFF=1
+FEATURES_MODEL_FILENAME=maca_trans_tagger.fm
+VOCABS_FILENAME=maca_trans_tagger.vocab 
+MCD_FILENAME=maca_trans_tagger.mcd
+MODEL_FILENAME=maca_trans_tagger.model 
+NUMBER_OF_SENTENCES=10000000
+STREAM_MODE=0
+
+
+##-----------------------------------------------------------------------
+## compile
+##-----------------------------------------------------------------------
+
+compile: $(MODEL_FILENAME)
+
+$(CFF_TRAIN): $(CONLL07TRAIN)
+	maca_trans_parser_conll2cff_tagger --conll $< --mode TRAIN --feat_model $(FEATURES_MODEL_FILENAME) --vocabs $(VOCABS_FILENAME) --cff $@ -s $(NUMBER_OF_SENTENCES)  -T $(STREAM_MODE) -C $(MCD_FILENAME)
+
+$(CFF_CUTOFF_TRAIN): $(CFF_TRAIN)
+	maca_trans_parser_cff_cutoff --cff $< --vocabs $(VOCABS_FILENAME) --cutoff $(CFF_CUTOFF) > $@
+
+#$(MODEL_FILENAME): $(CFF_CUTOFF_TRAIN)
+$(MODEL_FILENAME): $(CFF_TRAIN)
+	maca_trans_parser_train_from_cff --cff $< --model $(MODEL_FILENAME) -n $(PERCEPTRON_ITERATIONS)
+
+##-----------------------------------------------------------------------
+## install
+##-----------------------------------------------------------------------
+
+install:
+	- cp $(FEATURES_MODEL_FILENAME) ../bin
+	- cp $(VOCABS_FILENAME) ../bin
+	- cp $(MODEL_FILENAME) ../bin
+
+##-----------------------------------------------------------------------
+## clean
+##-----------------------------------------------------------------------
+
+clean:
+	- rm -f $(VOCABS_FILENAME)
+	- rm -f $(MODEL_FILENAME)
+	- rm -f $(CFF_TRAIN)
+	- rm -f $(CFF_CUTOFF_TRAIN)
+
+
diff --git a/fr/maca_trans_tagger/maca_trans_tagger.fm b/fr/maca_trans_tagger/maca_trans_tagger.fm
new file mode 100644
index 0000000..a737807
--- /dev/null
+++ b/fr/maca_trans_tagger/maca_trans_tagger.fm
@@ -0,0 +1,11 @@
+b0f
+b1f
+b2f
+s0f
+s1f
+s0p
+s1p
+s2p
+s0p s1p
+s0p s1p s2p
+s1p s2p
diff --git a/fr/maca_trans_tagger/maca_trans_tagger.mcd b/fr/maca_trans_tagger/maca_trans_tagger.mcd
new file mode 100644
index 0000000..6b58caa
--- /dev/null
+++ b/fr/maca_trans_tagger/maca_trans_tagger.mcd
@@ -0,0 +1,2 @@
+1 FORM VOCAB _
+4 POS VOCAB _
-- 
GitLab