diff --git a/UD/create_archi.pl b/UD/create_archi.pl index 551db7fc7564b0146cf9635a7687dcd93b3ee778..cd9a51ae10fad0a5a95246b0aaaf410e8e0f5b3d 100755 --- a/UD/create_archi.pl +++ b/UD/create_archi.pl @@ -66,26 +66,22 @@ "UD_Vietnamese"); -foreach $L (@LANG) -{ - $command = "mkdir -f ../$L"; - print "$command\n"; - system($command); +chdir("template"); - $command = "cp ud_template.tgz ../$L"; - print "$command\n"; - system($command); +$command = "tar cvfz ../ud_template.tgz *"; +print "$command\n"; +system($command); - $command = "cd ../$L"; - print "$command\n"; - system($command); +chdir(".."); - $command = "tar xvfz ud_template.tgz -C../$L"; +foreach $L (@LANG) +{ + $command = "mkdir ../$L"; print "$command\n"; system($command); - $command = "cd ../UD"; + $command = "tar xfz ud_template.tgz -C../$L"; print "$command\n"; system($command); - + } diff --git a/UD/launch.sh b/UD/launch.sh index 25fee25992aa463a5d069dbd6f249058970ed9e1..a57b9e9f6169dfae10cea8f993a6f138edc0bc6b 100755 --- a/UD/launch.sh +++ b/UD/launch.sh @@ -151,8 +151,8 @@ make _LANG=UD_Romanian _CODE=ro -C ../UD_Romanian all make -C ../UD_Russian clean make _LANG=UD_Russian _CODE=ru -C ../UD_Russian all -make -C ../UD_Russian-SynTagRus clean -make _LANG=UD_Russian-SynTagRus _CODE=ru_syntagrus -C ../UD_Russian-SynTagRus all +#make -C ../UD_Russian-SynTagRus clean +#make _LANG=UD_Russian-SynTagRus _CODE=ru_syntagrus -C ../UD_Russian-SynTagRus all make -C ../UD_Slovak clean make _LANG=UD_Slovak _CODE=sk -C ../UD_Slovak all diff --git a/UD/mise_en_forme.pl b/UD/mise_en_forme.pl index befc4cb6eed28149fb2cf0ba2e6c0cb4539e852c..1d596de5c334e5fb980bf272adf5a440b852cdfc 100755 --- a/UD/mise_en_forme.pl +++ b/UD/mise_en_forme.pl @@ -16,11 +16,12 @@ foreach $fichier (@tab_fichiers) open($F, "<", $fichier) or die "Can't open < $fichier: $!"; while(<$F>){ - if (/Wplgfs/){ + if (/Wpmlgfs/){ chop; # print; - ($file, $pos, $lemma, $uas, $las, $srec, $sacc, $size) = split /\t/; + ($file, $pos, $morpho, $lemma, $uas, $las, $srec, $sacc, $size) = split /\t/; $pos_acc{$langue} = $pos; + $morpho_acc{$langue} = $morpho; $lemma_acc{$langue} = $lemma; $uas{$langue} = $uas; $las{$langue} = $las; @@ -34,6 +35,7 @@ foreach $fichier (@tab_fichiers) foreach $langue (keys %h_langues){ $pos = $pos_acc{$langue}; + $morpho = $morpho_acc{$langue}; $lemma = $lemma_acc{$langue}; $uas = $uas{$langue}; $las = $las{$langue}; @@ -42,20 +44,22 @@ foreach $langue (keys %h_langues){ $size = $size{$langue}; $total_size += $size; $total_pos += $pos * $size; + $total_morpho += $morpho * $size; $total_lemma += $lemma * $size; $total_uas += $uas * $size; $total_las += $las * $size; $total_srec += $srec * $size; $total_sacc += $sacc * $size; - print "$langue\t$pos\t$lemma\t$uas\t$las\t$srec\t$sacc\t$size\n"; + print "$langue\t$pos\t$morpho\t$lemma\t$uas\t$las\t$srec\t$sacc\t$size\n"; } $pos = $total_pos / $total_size; +$morpho = $total_morpho / $total_size; $lemma = $total_lemma / $total_size; $uas = $total_uas / $total_size; $las = $total_las / $total_size; $srec = $total_srec / $total_size; $sacc = $total_sacc / $total_size; -printf("\navg\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", $pos, $lemma, $uas, $las, $srec, $sacc); +printf("\navg\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", $pos, $morpho, $lemma, $uas, $las, $srec, $sacc); diff --git a/UD/template/Makefile b/UD/template/Makefile index d54d12a15305f243228fce8e957618028696a0e0..331ac3be5c18e43852198a58deb677f065bc5b6c 100644 --- a/UD/template/Makefile +++ b/UD/template/Makefile @@ -1,14 +1,5 @@ UD_DIR=../../../data/ud-treebanks-conll2017 -## UD_French -#LANG=UD_French -#CODE=fr-ud - -## -#LANG=UD_English -#CODE=en-ud - - TRAIN=$(UD_DIR)/$(_LANG)/$(_CODE)-ud-train.conllu DEV=$(UD_DIR)/$(_LANG)/$(_CODE)-ud-dev.conllu TEST=$(UD_DIR)/$(_LANG)/$(_CODE)-ud-dev.conllu @@ -20,16 +11,18 @@ compile: $(MAKE) TRAIN=$(TRAIN) -C data/morpho-lexicon compile $(MAKE) TRAIN=$(TRAIN) TEST=$(TEST) DEV=$(DEV) -C data/treebank compile $(MAKE) -C maca_lexer compile - $(MAKE) -C maca_trans_parser compile $(MAKE) -C maca_trans_tagger compile + $(MAKE) -C maca_trans_morpho compile + $(MAKE) -C maca_trans_parser compile # $(MAKE) -C maca_crf_tagger compile install: -mkdir -p bin - $(MAKE) -C maca_trans_parser install + $(MAKE) -C maca_lexer install $(MAKE) -C maca_trans_tagger install + $(MAKE) -C maca_trans_morpho install $(MAKE) -C maca_lemmatizer install - $(MAKE) -C maca_lexer install + $(MAKE) -C maca_trans_parser install # $(MAKE) -C maca_crf_tagger install # @tar -cvzf ./maca_datas.tgz bin @@ -40,7 +33,8 @@ clean: $(MAKE) -C data/morpho-lexicon clean $(MAKE) -C data/treebank clean $(MAKE) -C maca_lexer clean - $(MAKE) -C maca_trans_parser clean $(MAKE) -C maca_trans_tagger clean + $(MAKE) -C maca_trans_morpho clean + $(MAKE) -C maca_trans_parser clean # $(MAKE) -C maca_crf_tagger clean $(MAKE) -C eval clean diff --git a/UD/ud_template.tgz b/UD/ud_template.tgz index f4d2ffaea32cb3e490c6e2457c01a76f60ce6928..ac95a1dad74c60586d6d14ad42dda8ceae018e79 100644 Binary files a/UD/ud_template.tgz and b/UD/ud_template.tgz differ diff --git a/fm/maca_trans_morpho.fm b/fm/maca_trans_morpho.fm index 9fd77ae0860faf38daf7b6c7cfccf99fb61cd203..e35691ddeee567ee22801901383176ca22b53066 100644 --- a/fm/maca_trans_morpho.fm +++ b/fm/maca_trans_morpho.fm @@ -1,19 +1,21 @@ -b0U1 -b0f +#b0U1 +b0p b0f b0len -bm1f -bm2f +#bm1f +#bm2f b0p bm1p -bm2p -bm3p -bm2p bm1p -bm2p bm3p -b0s1 -b0s2 -b0s3 -b0s4 -b0s5 -b0s1 b0s2 -b0s1 b0s2 b0s3 -b0s1 b0s2 b0s3 b0s4 +bm1m +bm1p bm1m +#bm2p +#bm3p +#bm2p bm1p +#bm2p bm3p +b0p b0s1 +#b0s2 +#b0s3 +#b0s4 +#b0s5 +b0p b0s1 b0s2 +b0p b0s1 b0s2 b0s3 +b0p b0s1 b0s2 b0s3 b0s4 diff --git a/fm/maca_trans_parser.fm b/fm/maca_trans_parser.fm index 6e18e5ed6fec6180187615dd6eafd3ff5df848d7..c57119c144c7d9ce8c56862878decc63226c145c 100644 --- a/fm/maca_trans_parser.fm +++ b/fm/maca_trans_parser.fm @@ -1,7 +1,7 @@ -#b0m -#s0m -#b0m s0m -#s0l s0m b0l b0m +b0m +s0m +b0m s0m +s0l s0m b0l b0m b0g diff --git a/fm/maca_trans_tagger.fm b/fm/maca_trans_tagger.fm index 50af67ebe5ff61fa2c71a524cdcd40401183c8df..f96b164c5095ce83dfb40ed72cee2b99421e075a 100644 --- a/fm/maca_trans_tagger.fm +++ b/fm/maca_trans_tagger.fm @@ -17,10 +17,10 @@ bm2p bm3p bm1p b0sgn b0s1 -b0s2 -b0s3 -b0s4 -b0s5 +#b0s2 +#b0s3 +#b0s4 +#b0s5 b0s1 b0s2 b0s1 b0s2 b0s3 b0s1 b0s2 b0s3 b0s4 diff --git a/fr/maca_trans_morpho/Makefile b/fr/maca_trans_morpho/Makefile index 71a25f6e1ac6abb7608db400219b02e08f580a0f..43b46b7b9332631f3597a6982edc7e32359dd795 100644 --- a/fr/maca_trans_morpho/Makefile +++ b/fr/maca_trans_morpho/Makefile @@ -6,8 +6,8 @@ CFF_TRAIN=train.cff FANN_TRAIN=train.fann CFF_FANN_TRAIN=train.fann.cff CFF_CUTOFF_TRAIN=train.cutoff.cff -PERCEPTRON_ITERATIONS=9 -CFF_CUTOFF=1 +PERCEPTRON_ITERATIONS=4 +CFF_CUTOFF=2 FEATURES_MODEL_FILENAME=../../fm/maca_trans_morpho.fm FEATURES_MODEL_FANN_FILENAME=../../fm/maca_trans_tagger_fann.fm VOCABS_FILENAME=maca_trans_morpho.vocab diff --git a/fr/maca_trans_tagger/maca_trans_tagger.fm b/fr/maca_trans_tagger/maca_trans_tagger.fm index ddeac496238341129b1d03e96ff3e203cf5a0d24..8c8b9fc2a7557e35ac7cb803fd0d0d826969f169 100644 --- a/fr/maca_trans_tagger/maca_trans_tagger.fm +++ b/fr/maca_trans_tagger/maca_trans_tagger.fm @@ -10,3 +10,8 @@ bm3p bm2p bm1p bm2p bm3p bm1p b0sgn + +b0s1 +b0s1 b0s2 +b0s1 b0s2 b0s3 +b0s1 b0s2 b0s3 b0s4 diff --git a/makefiles/eval.makefile b/makefiles/eval.makefile index dbb2f28ecc9bd7e6ada9929b069f1cffb87e4ef2..4cc42d6db271848861fb9e3a0d063cee347510c2 100644 --- a/makefiles/eval.makefile +++ b/makefiles/eval.makefile @@ -79,18 +79,4 @@ eval: eval_header test_WPLGFS test_WPLgfs test_WPlgfs test_Wplgfs clean: # -rm $(RESULT_FILE) - -rm test_W - -rm test_P - -rm test_L - -rm test_G - -rm test_F - -rm test_S - -rm test_WP - -rm test_WPL - -rm test_WPLGFS - -rm test_Wp - -rm test_Wpl - -rm test_WPl - -rm test_WPLgfs - -rm test_WPlgfs - -rm test_Wplgfs + -rm test_* diff --git a/makefiles/eval_ud.makefile b/makefiles/eval_ud.makefile index 14f10c82fce0fda7f4ed8c2512e0e1aaf3587b63..804ea0e28324d75efb67a944f920864cb486cf84 100644 --- a/makefiles/eval_ud.makefile +++ b/makefiles/eval_ud.makefile @@ -19,8 +19,7 @@ WPMLGFS_MCD_FILE=../../mcd/wpmlgfs.mcd WPLGFS_MCD_FILE=../../mcd/wplgfs.mcd WPLSGF_MCD_FILE=../../mcd/wplsgf.mcd - -all: test_W test_WP test_WPL test_WPLGFS test_Wp test_Wpl test_WPl test_WPLSgf test_WPLgfs test_WPlgfs test_Wplgfs test_WPMLgfs test_WPMLGFS eval #eval_ud +all: eval test_W: $(TEST) cut -f $(WORD_COLUMN) $< > $@ @@ -46,89 +45,56 @@ test_S:$(TEST) test_WP:test_W test_P paste test_W test_P > $@ -test_WPL:test_W test_P test_L - paste test_W test_P test_L > $@ - -test_WPLS:test_W test_P test_L test_S - paste test_W test_P test_L test_S > $@ - test_WPM:test_W test_P test_M paste test_W test_P test_M > $@ test_WPML:test_W test_P test_M test_L paste test_W test_P test_M test_L > $@ -test_WPMLS:test_W test_P test_M test_L test_S - paste test_W test_P test_M test_L test_S > $@ - -test_WPLGFS:test_W test_P test_L test_G test_F test_S - paste test_W test_P test_L test_G test_F test_S > $@ - test_WPMLGFS:test_W test_P test_M test_L test_G test_F test_S paste test_W test_P test_M test_L test_G test_F test_S > $@ - test_Wp: test_W - $(TAGGER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< -S > $@ + $(TAGGER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< -S > $@ # cat $< | $(CRF_TAGGER) -L $(LANGUAGE) > $@ -test_Wpl: test_Wp - $(LEMMATIZER) -C $(WPLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@ +test_WPm: test_WP + $(MORPHO_ANALYZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@ test_Wpm: test_Wp $(MORPHO_ANALYZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@ -test_Wpml: test_Wpm +test_WPMl: test_WPM + $(LEMMATIZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@ + +test_WPml: test_WPm $(LEMMATIZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@ -test_WPl: test_WP - $(LEMMATIZER) -C $(WPLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@ +test_Wpml: test_Wpm + $(LEMMATIZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@ test_WPMLgfs: test_WPML $(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ -test_Wpmlgfs: test_Wpml +test_WPMlgfs: test_WPMl $(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ -test_WPLSgf: test_WPLS - $(PARSER) -L $(LANGUAGE) -C $(WPLSGF_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ - -test_WPlgfs: test_WPl - $(PARSER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ - -test_WPLgfs: test_WPL - $(PARSER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ - -test_Wplgfs: test_Wpl - $(PARSER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ - -eval_Wpmlgfs: test_WPMLGFS test_Wpmlgfs - $(EVAL_WPLGFS) -g test_WPMLGFS -s test_WPMLgfs - -eval_WPMLgfs: test_WPMLGFS test_WPMLgfs - $(EVAL_WPLGFS) -g test_WPMLGFS -s test_WPMLgfs - -eval_WPLgfs: test_WPLGFS test_WPLgfs - $(EVAL_WPLGFS) -g test_WPLGFS -s test_WPLgfs - -eval_WPlgfs: test_WPLGFS test_WPlgfs - $(EVAL_WPLGFS) -g test_WPLGFS -s test_WPlgfs +test_WPmlgfs: test_WPml + $(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ -eval_Wplgfs: test_WPLGFS test_Wplgfs - $(EVAL_WPLGFS) -g test_WPLGFS -s test_Wplgfs +test_Wpmlgfs: test_Wpml + $(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@ eval_header: echo "file pos morpho lemma uas las srec sacc" > $(RESULT_FILE) -eval: eval_header test_WPLGFS test_WPLgfs test_WPlgfs test_Wplgfs test_Wpmlgfs - $(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPLGFS >> $(RESULT_FILE) - $(EVAL_MCF) -G WPMLGFS -S WPMLGFS -g test_WPMLGFS -s test_WPMLgfs >> $(RESULT_FILE) - $(EVAL_MCF) -G WPLGFS -S WPLSGF -g test_WPLGFS -s test_WPLSgf >> $(RESULT_FILE) - $(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPLgfs >> $(RESULT_FILE) - $(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPlgfs >> $(RESULT_FILE) - $(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_Wplgfs >> $(RESULT_FILE) - $(EVAL_MCF) -G WPMLGFS -S WPMLGFS -g test_WPMLGFS -s test_Wpmlgfs >> $(RESULT_FILE) +eval: eval_header test_WPMLGFS test_WPMLgfs test_WPMlgfs test_WPmlgfs test_Wpmlgfs + $(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPMLGFS >> $(RESULT_FILE) + $(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPMLgfs >> $(RESULT_FILE) + $(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPMlgfs >> $(RESULT_FILE) + $(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPmlgfs >> $(RESULT_FILE) + $(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_Wpmlgfs >> $(RESULT_FILE) test_WPLGFS.conll: test_WPLGFS @@ -144,22 +110,4 @@ eval_ud: test_WPLGFS.conll test_WPLSgf.conll python ../../tools/conll17_ud_eval.py test_WPLGFS.conll test_WPLSgf.conll clean: # -rm $(RESULT_FILE) - -rm test_W - -rm test_P - -rm test_M - -rm test_L - -rm test_G - -rm test_F - -rm test_S - -rm test_WP - -rm test_WPL - -rm test_WPLS - -rm test_WPLGFS - -rm test_WPMLGFS - -rm test_Wp - -rm test_Wpl - -rm test_WPl - -rm test_WPMLgfs - -rm test_WPLgfs - -rm test_WPlgfs - -rm test_Wplgfs + -rm test_* diff --git a/makefiles/maca_trans_morpho.makefile b/makefiles/maca_trans_morpho.makefile index 5981e83b726d76f9fa517696b27e30027c15b5a3..a9758a0925348cae339139963c880a619455bc4f 100644 --- a/makefiles/maca_trans_morpho.makefile +++ b/makefiles/maca_trans_morpho.makefile @@ -27,6 +27,7 @@ $(MODEL_FILENAME): $(CFF_CUTOFF_TRAIN) install: - cp $(FEATURES_MODEL_FILENAME) ../bin + - cp $(FEATURES_MODEL_FANN_FILENAME) ../bin - cp $(VOCABS_FILENAME) ../bin - cp $(MODEL_FILENAME) ../bin