From 6378320578f87ebd507f267bc75d0c6a7480c846 Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Tue, 18 Apr 2017 10:05:43 +0200
Subject: [PATCH] modified some settings for universal dependencies

---
 UD/template/maca_trans_morpho/Makefile |   2 +-
 UD/ud_template.tgz                     | Bin 1431 -> 1388 bytes
 fm/maca_trans_parser.fm                |   4 ++++
 makefiles/treebank_ud.makefile         |  10 +++++++---
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/UD/template/maca_trans_morpho/Makefile b/UD/template/maca_trans_morpho/Makefile
index 71a25f6..eae858e 100644
--- a/UD/template/maca_trans_morpho/Makefile
+++ b/UD/template/maca_trans_morpho/Makefile
@@ -6,7 +6,7 @@ CFF_TRAIN=train.cff
 FANN_TRAIN=train.fann
 CFF_FANN_TRAIN=train.fann.cff
 CFF_CUTOFF_TRAIN=train.cutoff.cff
-PERCEPTRON_ITERATIONS=9
+PERCEPTRON_ITERATIONS=4
 CFF_CUTOFF=1
 FEATURES_MODEL_FILENAME=../../fm/maca_trans_morpho.fm
 FEATURES_MODEL_FANN_FILENAME=../../fm/maca_trans_tagger_fann.fm
diff --git a/UD/ud_template.tgz b/UD/ud_template.tgz
index ac95a1dad74c60586d6d14ad42dda8ceae018e79..3a068f9ad8b0c3eaa79a41cd1f88c6824f7c5917 100644
GIT binary patch
literal 1388
zcmb2|=3vmM{1Cyw{5Co_f038Sx7VMv)i%n%Vi%ooF-LgvX)Uv}x>-}+)v1Im%;w6f
zdAaxH`u)bt$|o8w=v{bsxAM}O*ym@YpBKz4KIc>V`es?!x>#*NC8zDHANa3iU;0A5
zO{d$-_f^MUKmQPs$Tgjlbexv59WfGjExaSYMwY?!^w$5eZ!bUHbgMqthUe_VyKC-e
zZT&lcY3YVr$5TIV*!<Q#`|qB7J+tiVQzsr{IXGiw`Qv|Ax87fQao;Yp_g6pa{!BHH
z%<5HpnYs3w?xoinJX_yKe)9g^>Lq>aPDS$$g@|?4%wp2{-8vF$u6ayd$Nfh9v~5)T
z)ihlW$J@LD`(k2Wzq%jAe8G3ZdA1BbhMSwW-^{Jgn-y_7qrxg9YkR`VAB!3P#4f5o
z{P4x+WOgT^d0RL5m`vX5o$uCPxYwNj+U4MFQ}%z;S-E!WZTDlVjFY<O3eOMVkkhJb
z@O@vo^H$?CF`ISQGJkVEG5$7LQS^;^=&7x?{F*;ZtCu944|(-yp7X-}8@x00;^Jg4
zWC<Itxm^4D!xyh_dM#>Y!smK&zdG){`s+#_quH$w^%-7w_$8NSM_=sARO>hzvGT&E
z8Q)gKs+!M>zN#vj6+As<g{$Dq)8CHpG|Rhw`0D=qfW732Ik!{KH+t^7-kkZT-|>h1
z#{DX%V~_lwbfwjl;dGqDG5K@<k1U;H;~w|7{p0*A6RTtXhblByaGAGne;q%u+UR<{
zsK=%Kn@epkN9lFXQda)&eZl_7S)2c>89aYo=U&H|@>0IIF6O`N%-8$5uC%XO_)L#+
zOJJ7Wt(&ip`Y-eoPO@@X{PxX^Kk`rexxeq&axk+*-(X$snOA2XZ*sj|;T9lyF6h)G
zri0r**Rwxmob!M6ft~-lzx}b>^ryc4<A1I<Wjp?_XHxzBzbZhzm)mvKj;{-<X6Y4n
zy1Y<2|DyS`d)@=1JLzZsi^lz5$>Q?=>5jj`Mt}F;=>EK4!|C1s)eQmmve7O3{vHq6
zzhCyOZ*9ZAUAwO{<$Vx4{mnl5d~MM;-7nTc-qWo(1(_DCDdIBW`}VJ4mTZy3i7iR@
z`=yin?7#fFkyUvpfF<d9=F)(J){p8rT;Kg)?GW=nHNdg$|8$qumH$sF_5asq<lF7f
z_hXNeGRN1d%@PYH9;#+<<&Apt%kTLAqig%)<_YXGIhe%#M`gL=tJgCwod|78n!mAL
zDE<zg#m0Z<1>+<&^|@=T|DO(cRrGH?li=U!(^%7<)=!ez|9_>&gwOS=629-h27jAY
zCNXDTN9(i0tMm7(&XBHGz2Fe{e|?9oq}GHz|2G}^TQ8dMHQt6d<!QZ8&ue+78&Ci3
z=X%jR@#Xv{8%(W=CN#e;h<K5-u&<|R=LK~hw~}QKT22(UOtg}lU;8rRwdq8k9L3s#
z*AEWPIMQw{XV&Mevg}sQ0^i=7DLMBfZ~2(MQ+r$)-}&W@_aTqq-u9Vh=3h?+>{b`C
z2rG>`QnJVFbJ`Y}{+F%cl^a*r|G%lS+Wqf-f3{cKD{r*UeUiF)gWN&S4>?S3x8%)V
zIKAF5aqAK7Z5#J)+FU(*>0746KMU$Fr#(waIk$D=!d~`Ud-Pe?XS*2`*va&B+F8!H
z{YU=9|BFB3pM1C1VqE>7|H1#fKY#e0mLC7PpQ}da|9`E^>c{y{{^4Kz=YFN||KIt?
zcK^8lepdVcX3wHe^(8T<{;zx@$1}J6PBDvMaq_a2R+UV(7v?<gVt>K)qw=jfv+$}q
zzJKD1|9{W-I`C?L)xYEQZ$zKh&%FPCJ<I%=|1Ym*et+`+{(biUzu)hf^!b0nr1%H_
zQ(mPl-x0&Pz{#?SRZU33H*J^Y+%xB|O{`dDAlmXvGtVPRxmTo!Mf{`QpBuB9VL(~(
OKO?vE(Jc%C3=9Awo7|58

literal 1431
zcmb2|=3prK`8I-q`EAtgyhk1aXJ7x&j#A=(c<{luP-)Ig+k{0Kh6)8~^1AGAc)EBy
zywj$xzyEHVbhExhS&sVV%kF_5FJ8@F9kYDZF5l-@zl!av4pwAg37z^;{fbD%pH9WD
zrW$5d$H^+Lf+;HOu8yL44FOLVX?(jqcQfOIV;}F<i`}l-x#(NGh`;Ub?-nn&M7{lx
z{_f43Z*}M1o2~o(MkjvzpPa2<Y6UZU9<S;AXYuCFz1F$2-rlyZUcfJZLjG>x`HW9X
zmT!*O=CR)Py74~IljrZ6YvoD?zve3JE3Wx3cP8whRHQ+K&P1(R`5pOv`^(F^uTFZr
zXj}Dx+MRoizwYl*TQFN;zVCr@#+$_(ZmrqBzG6+PiH)qu_UJ7ke=f2-)Vuh;!s6GT
zOU#O$2WDED%{qDaiTUE=JMP@Fe*Sc6gjP)T#4ERM=JX%ST5=>!THjJp+&1J7!_Nn$
z`))AQ@y-)Jz2~pXC)VGZ6S}@l)_%I%u|ecdoK4V{bgj~|^BXVBzoGhJ^Y(K7Ew@~F
zUYFdywRg|ty~P_l?;bZ!j;_5V-@~^xx}n$XAHQ$sZ^gx5VxR9wp2wNAaZ5-}<jQZ!
zyHDlWY+Y4sbY<DOlL6gIM$g}Bn)1|%T)Zy-mbpG%p*SzxmR+sVH~7;J`Q{JykLonK
zv>w%Sgg#}{2-W9#Z2r9dQIOWW$4-A&Yh6+36jq#)a^g!oyHUsTlHwh&{!V9IQXiS}
zzTWo4rTyE#G2fnf{`9QH3je+L{1>{kf6+>%)Bj&NO#63sQZ|$8uj6z4zpU?UQgB>U
z-}&_a)&&<Y>;8Qt<RHED+27vXb84SUnJ6w}Ho1LIZR-EUe+r+yf39cIBj<8#t8c{I
zz4NzcNT24&I_&8c8K_Vp`d@p-f7K}r2LIMGFaM=~`*XX-r~g4u{+kB27+op&vwVft
z=l`V(lK$WCc(ufH;bN^k<yG^ZdCyT6+;u|uSHhn{!ve{7lb`%Q@_+R#w@>x0KmSKL
ze2MS9ZNK2cf9|L!|3e+87W`9RDF5vLYliC<GSNr&{e8dDvf_n*?&j`&RWcHS<}8~l
ze;>E{5Wgpv`)}`&PfPinS{O7|%P{rM-F<gqTEC3IvZ#&kkN0vOm#?nP*{YMM!QA<I
zi`R+=zkcZ#{a5<@zf@u6f2|b@4*Yj^^m`RAxbV=Qdf5Ys-w!g*4@<FFck#h#Mk&pG
z<pH-AJYDy`;7|O$UoW0=zT@Uk%N9_x^bLwXKE-XF(3IajhbRB|RCb`_Ykl93Qy!&}
z4?@4%3ts>4!n%~D=YRHriT|`uGj)Hm*Eq%bZ~Do<{Kx)ZU2HEmqy3HUdjku5!S1u|
zq4(=Nm*o8kbzq+UtKKsxB~anJy~5}JUlrE=|CG5v<A1YP(El!_(`Wu)XWV>y`{BuR
zN_zWbo;<LU5wnV3p}csG{+GhLc5^kY6hGOlThHIF9$$5?$Wz5Q+C%=%t=dg)Cm(*>
zJ@vU@=SIVqQpwpyQn&i28D3m_{9?hapO06hvlrDmZ=a`j?24JYZL)(#!HU^iI$pBA
zy)ygar=}47J*PIu9{zpaEYkMX|J*BA_xnB8d+B*E%=o#a42!>-hiCg^x7ZBXZyCBL
zyK<A(zr5Le_0CsD^BSZ5Qo_|&wl6<+!Kf`Z`Bvk_*HHpIbNG%YHTm~l$^FOwq(0E~
z{;8h-uU(@4%OCi^YTAYUO-u~O{+B*T{`o(Brk!=&iD&gsXZ&Bf`iFgu@jv1J?Xmg~
z|7T3%{`tQm=G6b4Pvm%OId{ejxcIE?TsZp)X9<J-UJbbg+6m9M&gWSf@U{78Jm>5B
zrep8^UuoL%@AdO~&nrT|+`sTk{=eRM{>=ZMR~u)2{{P|Z3XA{!YCk#uonQ9ui^y|r
z#*i0H35ueeU)+T+%k?i)e|^FxRH8WMv4DBX+KFyl;w<7P_1@f=)d&NM(*K$6*UyM%
I2w-3U00Tev8~^|S

diff --git a/fm/maca_trans_parser.fm b/fm/maca_trans_parser.fm
index 0806e1c..a34e6ca 100644
--- a/fm/maca_trans_parser.fm
+++ b/fm/maca_trans_parser.fm
@@ -1,3 +1,7 @@
+#for ud, in order to try to better predict punctuation
+#s0p b0p ldep_b0r
+#s0p b0p ldep_b0p
+
 b0m
 s0m
 b0m s0m
diff --git a/makefiles/treebank_ud.makefile b/makefiles/treebank_ud.makefile
index 7fe3f07..1f9cfea 100644
--- a/makefiles/treebank_ud.makefile
+++ b/makefiles/treebank_ud.makefile
@@ -1,16 +1,20 @@
 TOOLS=../../../tools
+THRESHOLD=10
 
 compile: train.mcf  test.mcf dev.mcf
 
 train.mcf: $(TRAIN)
-	$(TOOLS)/conllu2mcf -f $< -1W -2C -3F -4L -5H -6D > $@
+	$(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp
+	$(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@
 
 test.mcf: $(TEST)
-	$(TOOLS)/conllu2mcf -f $< -1W -2C -3F -4L -5H -6D > $@
+	$(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp
+	$(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@
 
 
 dev.mcf: $(TEST)
-	$(TOOLS)/conllu2mcf -f $< -1W -2C -3F -4L -5H -6D > $@
+	$(TOOLS)/conll_keep_most_frequent_morpho_tags.pl $< $(THRESHOLD) > tmp
+	$(TOOLS)/conllu2mcf -f tmp -1W -2C -3F -4L -5H -6D > $@
 
 
 clean: 
-- 
GitLab