From aee073e97006c401da62980beb2ba45f5c43a608 Mon Sep 17 00:00:00 2001 From: "alexis.nasr" <alexis.nasr@lif.univ-mrs.fr> Date: Wed, 17 May 2017 14:55:15 +0200 Subject: [PATCH] updated maca_data2/orfeo --- makefiles/maca_trans_lemmatizer.makefile | 2 +- orfeo/Makefile | 4 +- orfeo/data/treebank/Makefile | 9 +++- orfeo/eval/Makefile | 8 ++-- orfeo/maca_lemmatizer/Makefile | 18 -------- orfeo/maca_trans_lemmatizer/Makefile | 16 ++++++++ orfeo/maca_trans_parser/Makefile | 2 +- orfeo/maca_trans_parser/maca_trans_parser.fm | 43 ++++++++++++++++++-- orfeo/maca_trans_tagger/Makefile | 4 +- 9 files changed, 74 insertions(+), 32 deletions(-) delete mode 100644 orfeo/maca_lemmatizer/Makefile create mode 100644 orfeo/maca_trans_lemmatizer/Makefile diff --git a/makefiles/maca_trans_lemmatizer.makefile b/makefiles/maca_trans_lemmatizer.makefile index bdd03ff..e8b0abf 100644 --- a/makefiles/maca_trans_lemmatizer.makefile +++ b/makefiles/maca_trans_lemmatizer.makefile @@ -31,6 +31,6 @@ install: clean: -rm $(RULES_FILENAME) -rm $(EXCEPTIONS_FPLM_FILENAME) - -rm $(TRAIN_CFF) + -rm $(CFF_TRAIN) -rm $(MODEL_FILENAME) -rm $(VOCABS_FILENAME) diff --git a/orfeo/Makefile b/orfeo/Makefile index 6e0e763..81f8118 100644 --- a/orfeo/Makefile +++ b/orfeo/Makefile @@ -7,13 +7,14 @@ compile: $(MAKE) -C data/treebank compile $(MAKE) -C maca_trans_parser compile $(MAKE) -C maca_trans_tagger compile + $(MAKE) -C maca_trans_lemmatizer install # $(MAKE) -C maca_crf_tagger compile install: -mkdir -p bin $(MAKE) -C maca_trans_parser install $(MAKE) -C maca_trans_tagger install - $(MAKE) -C maca_lemmatizer install + $(MAKE) -C maca_trans_lemmatizer install # $(MAKE) -C maca_crf_tagger install # @tar -cvzf ./maca_datas.tgz bin @@ -25,5 +26,6 @@ clean: $(MAKE) -C data/treebank clean $(MAKE) -C maca_trans_parser clean $(MAKE) -C maca_trans_tagger clean + $(MAKE) -C maca_trans_lemmatizer clean # $(MAKE) -C maca_crf_tagger clean $(MAKE) -C eval clean diff --git a/orfeo/data/treebank/Makefile b/orfeo/data/treebank/Makefile index 2bd0183..d98bb9a 100644 --- a/orfeo/data/treebank/Makefile +++ b/orfeo/data/treebank/Makefile @@ -1,6 +1,11 @@ TOOLS=../../../tools -TRAIN=orfeo.train.conll07 -TEST=orfeo.test.conll07 +#TRAIN=orfeo.train.conll07 +#TEST=orfeo.test.conll07 +#TRAIN=corpus_orfeo_gold_v1.train +#TEST=corpus_orfeo_gold_v1.test + +TRAIN=corpus_orfeo_gold_v2.train +TEST=corpus_orfeo_gold_v2.test compile: train.mcf test.mcf #dev.mcf diff --git a/orfeo/eval/Makefile b/orfeo/eval/Makefile index 704f5d5..9724123 100644 --- a/orfeo/eval/Makefile +++ b/orfeo/eval/Makefile @@ -1,6 +1,6 @@ -TEST=../data/treebank/test.conll07 -DEV=../data/treebank/dev.conll07 +TEST=../data/treebank/test.mcf +DEV=../data/treebank/dev.mcf LANGUAGE=orfeo -MCD_FILE=eval.mcd +MCD_FILE=wplgfs.mcd -include ../../makefiles/eval.makefile +include ./eval_ud.makefile diff --git a/orfeo/maca_lemmatizer/Makefile b/orfeo/maca_lemmatizer/Makefile deleted file mode 100644 index 0aca079..0000000 --- a/orfeo/maca_lemmatizer/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -DATA=../data/morpho-lexicon - -##----------------------------------------------------------------------- -## compile -##----------------------------------------------------------------------- - -##----------------------------------------------------------------------- -## install -##----------------------------------------------------------------------- - -install: - - cp $(DATA)/fplm ../bin - -##----------------------------------------------------------------------- -## clean -##----------------------------------------------------------------------- - - diff --git a/orfeo/maca_trans_lemmatizer/Makefile b/orfeo/maca_trans_lemmatizer/Makefile new file mode 100644 index 0000000..884f809 --- /dev/null +++ b/orfeo/maca_trans_lemmatizer/Makefile @@ -0,0 +1,16 @@ +MCF_TRAIN=../data/treebank/train.mcf +MCF_DEV=../data/treebank/dev.mcf +MCF_TEST=../data/treebank/test.mcf +CFF_TRAIN=train.cff +MODEL_FILENAME=maca_trans_lemmatizer.model +RULES_FILENAME=maca_trans_lemmatizer_rules.txt +EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm +VOCABS_FILENAME=maca_trans_lemmatizer.vocab +FEATURES_MODEL_FILENAME=maca_trans_lemmatizer.fm +FPLM_FILENAME=../data/morpho-lexicon/fplm +THRESHOLD=1000 +STRICT=-s +MCD_FILE=../../mcd/wplgfs.mcd + +#include ../../makefiles/maca_trans_lemmatizer.makefile +include ./maca_trans_lemmatizer.makefile diff --git a/orfeo/maca_trans_parser/Makefile b/orfeo/maca_trans_parser/Makefile index 1cbd002..27b3b26 100644 --- a/orfeo/maca_trans_parser/Makefile +++ b/orfeo/maca_trans_parser/Makefile @@ -11,7 +11,7 @@ VOCABS_FILENAME=maca_trans_parser.vocab MODEL_FILENAME=maca_trans_parser.model NUMBER_OF_SENTENCES=4218 #NUMBER_OF_SENTENCES=1000 -MCD_FILENAME=wplgfs.mcd +MCD_FILENAME=../../mcd/wplgfs.mcd STREAM_MODE= -S include ../../makefiles/maca_trans_parser.makefile diff --git a/orfeo/maca_trans_parser/maca_trans_parser.fm b/orfeo/maca_trans_parser/maca_trans_parser.fm index 7a9daea..b4ae75f 100644 --- a/orfeo/maca_trans_parser/maca_trans_parser.fm +++ b/orfeo/maca_trans_parser/maca_trans_parser.fm @@ -1,7 +1,23 @@ +b0g +s0g s0p +s0g b0p +s0g +s0sf +#s1g +#s1sf s0l + +#new +s0f + s0p s1p +s2p b0l + +#new +b0f + b0p b1l b1p @@ -9,15 +25,32 @@ b2p b3p ldep_s0r rdep_s0r +ldep_s1r +rdep_s1r ldep_b0r rdep_b0r s0l b0l + +#new +s0f b0l +s0l b0f +s0f b0f + s0p b0p + b0p b0l + +#new +b0p b0f + b0p ldep_b0r s1p b1p b1p b2p s0p b0p b0l + +#new +s0p b0p b0f + s0p ldep_s0r rdep_s0r s0p s0l b0p s0p b0p dist_s0_b0 @@ -28,8 +61,12 @@ s0p b0p b1p b1p b1l b2p b3p b1p b1l b2p b2l b3p t1 -t2 -t3 +#t2 +#t3 +#t4 t1 t2 -t2 t3 +#t2 t3 t1 t2 t3 + +bm1p +bm2p diff --git a/orfeo/maca_trans_tagger/Makefile b/orfeo/maca_trans_tagger/Makefile index baa1fbd..293975b 100644 --- a/orfeo/maca_trans_tagger/Makefile +++ b/orfeo/maca_trans_tagger/Makefile @@ -7,9 +7,9 @@ CFF_TRAIN=train.cff CFF_CUTOFF_TRAIN=train.cutoff.cff PERCEPTRON_ITERATIONS=9 CFF_CUTOFF=1 -FEATURES_MODEL_FILENAME=maca_trans_tagger.fm +FEATURES_MODEL_FILENAME=../../fm/maca_trans_tagger.fm VOCABS_FILENAME=maca_trans_tagger.vocab -MCD_FILENAME=maca_trans_tagger.mcd +MCD_FILENAME=../../mcd/wplgfs.mcd MODEL_FILENAME=maca_trans_tagger.model NUMBER_OF_SENTENCES=10000000 STREAM_MODE= -S -- GitLab