diff --git a/UD/template/maca_trans_morpho/Makefile b/UD/template/maca_trans_morpho/Makefile index 71a25f6e1ac6abb7608db400219b02e08f580a0f..eae858e7a0df04e3aaa2f2b5f55ad6ab0aafdc5c 100644 --- a/UD/template/maca_trans_morpho/Makefile +++ b/UD/template/maca_trans_morpho/Makefile @@ -6,7 +6,7 @@ CFF_TRAIN=train.cff FANN_TRAIN=train.fann CFF_FANN_TRAIN=train.fann.cff CFF_CUTOFF_TRAIN=train.cutoff.cff -PERCEPTRON_ITERATIONS=9 +PERCEPTRON_ITERATIONS=4 CFF_CUTOFF=1 FEATURES_MODEL_FILENAME=../../fm/maca_trans_morpho.fm FEATURES_MODEL_FANN_FILENAME=../../fm/maca_trans_tagger_fann.fm diff --git a/UD/ud_template.tgz b/UD/ud_template.tgz index ac95a1dad74c60586d6d14ad42dda8ceae018e79..3a068f9ad8b0c3eaa79a41cd1f88c6824f7c5917 100644 Binary files a/UD/ud_template.tgz and b/UD/ud_template.tgz differ diff --git a/fm/maca_trans_parser.fm b/fm/maca_trans_parser.fm index 0806e1c4992917a36cfc23bbaaffe3d39fb8cef9..a34e6caac7329ea51900af4c89c8cfb885b23c9b 100644 --- a/fm/maca_trans_parser.fm +++ b/fm/maca_trans_parser.fm @@ -1,3 +1,7 @@ +#for ud, in order to try to better predict punctuation +#s0p b0p ldep_b0r +#s0p b0p ldep_b0p + b0m s0m b0m s0m diff --git a/makefiles/treebank_ud.makefile b/makefiles/treebank_ud.makefile index 7fe3f07025cfe2c65bfd35d56e7affd2f706897d..1f9cfea3b97bac45fd3f12341c6ad5c67f9ad6d0 100644 --- a/makefiles/treebank_ud.makefile +++ b/makefiles/treebank_ud.makefile @@ -1,16 +1,20 @@ TOOLS=../../../tools +THRESHOLD=10 compile: train.mcf test.mcf dev.mcf train.mcf: $(TRAIN) - $(TOOLS)/conllu2mcf -f $< -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp + $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ test.mcf: $(TEST) - $(TOOLS)/conllu2mcf -f $< -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp + $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ dev.mcf: $(TEST) - $(TOOLS)/conllu2mcf -f $< -1W -2C -3F -4L -5H -6D > $@ + $(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp + $(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@ clean: