Skip to content
Snippets Groups Projects
Commit 67262af4 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

integrated morpholgy in evaluation

parent b42fd532
No related branches found
No related tags found
No related merge requests found
......@@ -66,25 +66,21 @@
"UD_Vietnamese");
foreach $L (@LANG)
{
$command = "mkdir -f ../$L";
print "$command\n";
system($command);
chdir("template");
$command = "cp ud_template.tgz ../$L";
$command = "tar cvfz ../ud_template.tgz *";
print "$command\n";
system($command);
$command = "cd ../$L";
print "$command\n";
system($command);
chdir("..");
$command = "tar xvfz ud_template.tgz -C../$L";
foreach $L (@LANG)
{
$command = "mkdir ../$L";
print "$command\n";
system($command);
$command = "cd ../UD";
$command = "tar xfz ud_template.tgz -C../$L";
print "$command\n";
system($command);
......
......@@ -151,8 +151,8 @@ make _LANG=UD_Romanian _CODE=ro -C ../UD_Romanian all
make -C ../UD_Russian clean
make _LANG=UD_Russian _CODE=ru -C ../UD_Russian all
make -C ../UD_Russian-SynTagRus clean
make _LANG=UD_Russian-SynTagRus _CODE=ru_syntagrus -C ../UD_Russian-SynTagRus all
#make -C ../UD_Russian-SynTagRus clean
#make _LANG=UD_Russian-SynTagRus _CODE=ru_syntagrus -C ../UD_Russian-SynTagRus all
make -C ../UD_Slovak clean
make _LANG=UD_Slovak _CODE=sk -C ../UD_Slovak all
......
......@@ -16,11 +16,12 @@ foreach $fichier (@tab_fichiers)
open($F, "<", $fichier)
or die "Can't open < $fichier: $!";
while(<$F>){
if (/Wplgfs/){
if (/Wpmlgfs/){
chop;
# print;
($file, $pos, $lemma, $uas, $las, $srec, $sacc, $size) = split /\t/;
($file, $pos, $morpho, $lemma, $uas, $las, $srec, $sacc, $size) = split /\t/;
$pos_acc{$langue} = $pos;
$morpho_acc{$langue} = $morpho;
$lemma_acc{$langue} = $lemma;
$uas{$langue} = $uas;
$las{$langue} = $las;
......@@ -34,6 +35,7 @@ foreach $fichier (@tab_fichiers)
foreach $langue (keys %h_langues){
$pos = $pos_acc{$langue};
$morpho = $morpho_acc{$langue};
$lemma = $lemma_acc{$langue};
$uas = $uas{$langue};
$las = $las{$langue};
......@@ -42,20 +44,22 @@ foreach $langue (keys %h_langues){
$size = $size{$langue};
$total_size += $size;
$total_pos += $pos * $size;
$total_morpho += $morpho * $size;
$total_lemma += $lemma * $size;
$total_uas += $uas * $size;
$total_las += $las * $size;
$total_srec += $srec * $size;
$total_sacc += $sacc * $size;
print "$langue\t$pos\t$lemma\t$uas\t$las\t$srec\t$sacc\t$size\n";
print "$langue\t$pos\t$morpho\t$lemma\t$uas\t$las\t$srec\t$sacc\t$size\n";
}
$pos = $total_pos / $total_size;
$morpho = $total_morpho / $total_size;
$lemma = $total_lemma / $total_size;
$uas = $total_uas / $total_size;
$las = $total_las / $total_size;
$srec = $total_srec / $total_size;
$sacc = $total_sacc / $total_size;
printf("\navg\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", $pos, $lemma, $uas, $las, $srec, $sacc);
printf("\navg\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", $pos, $morpho, $lemma, $uas, $las, $srec, $sacc);
UD_DIR=../../../data/ud-treebanks-conll2017
## UD_French
#LANG=UD_French
#CODE=fr-ud
##
#LANG=UD_English
#CODE=en-ud
TRAIN=$(UD_DIR)/$(_LANG)/$(_CODE)-ud-train.conllu
DEV=$(UD_DIR)/$(_LANG)/$(_CODE)-ud-dev.conllu
TEST=$(UD_DIR)/$(_LANG)/$(_CODE)-ud-dev.conllu
......@@ -20,16 +11,18 @@ compile:
$(MAKE) TRAIN=$(TRAIN) -C data/morpho-lexicon compile
$(MAKE) TRAIN=$(TRAIN) TEST=$(TEST) DEV=$(DEV) -C data/treebank compile
$(MAKE) -C maca_lexer compile
$(MAKE) -C maca_trans_parser compile
$(MAKE) -C maca_trans_tagger compile
$(MAKE) -C maca_trans_morpho compile
$(MAKE) -C maca_trans_parser compile
# $(MAKE) -C maca_crf_tagger compile
install:
-mkdir -p bin
$(MAKE) -C maca_trans_parser install
$(MAKE) -C maca_lexer install
$(MAKE) -C maca_trans_tagger install
$(MAKE) -C maca_trans_morpho install
$(MAKE) -C maca_lemmatizer install
$(MAKE) -C maca_lexer install
$(MAKE) -C maca_trans_parser install
# $(MAKE) -C maca_crf_tagger install
# @tar -cvzf ./maca_datas.tgz bin
......@@ -40,7 +33,8 @@ clean:
$(MAKE) -C data/morpho-lexicon clean
$(MAKE) -C data/treebank clean
$(MAKE) -C maca_lexer clean
$(MAKE) -C maca_trans_parser clean
$(MAKE) -C maca_trans_tagger clean
$(MAKE) -C maca_trans_morpho clean
$(MAKE) -C maca_trans_parser clean
# $(MAKE) -C maca_crf_tagger clean
$(MAKE) -C eval clean
No preview for this file type
b0U1
b0f
#b0U1
b0p b0f
b0len
bm1f
bm2f
#bm1f
#bm2f
b0p
bm1p
bm2p
bm3p
bm2p bm1p
bm2p bm3p
b0s1
b0s2
b0s3
b0s4
b0s5
b0s1 b0s2
b0s1 b0s2 b0s3
b0s1 b0s2 b0s3 b0s4
bm1m
bm1p bm1m
#bm2p
#bm3p
#bm2p bm1p
#bm2p bm3p
b0p b0s1
#b0s2
#b0s3
#b0s4
#b0s5
b0p b0s1 b0s2
b0p b0s1 b0s2 b0s3
b0p b0s1 b0s2 b0s3 b0s4
#b0m
#s0m
#b0m s0m
#s0l s0m b0l b0m
b0m
s0m
b0m s0m
s0l s0m b0l b0m
b0g
......
......@@ -17,10 +17,10 @@ bm2p bm3p
bm1p b0sgn
b0s1
b0s2
b0s3
b0s4
b0s5
#b0s2
#b0s3
#b0s4
#b0s5
b0s1 b0s2
b0s1 b0s2 b0s3
b0s1 b0s2 b0s3 b0s4
......
......@@ -6,8 +6,8 @@ CFF_TRAIN=train.cff
FANN_TRAIN=train.fann
CFF_FANN_TRAIN=train.fann.cff
CFF_CUTOFF_TRAIN=train.cutoff.cff
PERCEPTRON_ITERATIONS=9
CFF_CUTOFF=1
PERCEPTRON_ITERATIONS=4
CFF_CUTOFF=2
FEATURES_MODEL_FILENAME=../../fm/maca_trans_morpho.fm
FEATURES_MODEL_FANN_FILENAME=../../fm/maca_trans_tagger_fann.fm
VOCABS_FILENAME=maca_trans_morpho.vocab
......
......@@ -10,3 +10,8 @@ bm3p
bm2p bm1p
bm2p bm3p
bm1p b0sgn
b0s1
b0s1 b0s2
b0s1 b0s2 b0s3
b0s1 b0s2 b0s3 b0s4
......@@ -79,18 +79,4 @@ eval: eval_header test_WPLGFS test_WPLgfs test_WPlgfs test_Wplgfs
clean:
# -rm $(RESULT_FILE)
-rm test_W
-rm test_P
-rm test_L
-rm test_G
-rm test_F
-rm test_S
-rm test_WP
-rm test_WPL
-rm test_WPLGFS
-rm test_Wp
-rm test_Wpl
-rm test_WPl
-rm test_WPLgfs
-rm test_WPlgfs
-rm test_Wplgfs
-rm test_*
......@@ -19,8 +19,7 @@ WPMLGFS_MCD_FILE=../../mcd/wpmlgfs.mcd
WPLGFS_MCD_FILE=../../mcd/wplgfs.mcd
WPLSGF_MCD_FILE=../../mcd/wplsgf.mcd
all: test_W test_WP test_WPL test_WPLGFS test_Wp test_Wpl test_WPl test_WPLSgf test_WPLgfs test_WPlgfs test_Wplgfs test_WPMLgfs test_WPMLGFS eval #eval_ud
all: eval
test_W: $(TEST)
cut -f $(WORD_COLUMN) $< > $@
......@@ -46,89 +45,56 @@ test_S:$(TEST)
test_WP:test_W test_P
paste test_W test_P > $@
test_WPL:test_W test_P test_L
paste test_W test_P test_L > $@
test_WPLS:test_W test_P test_L test_S
paste test_W test_P test_L test_S > $@
test_WPM:test_W test_P test_M
paste test_W test_P test_M > $@
test_WPML:test_W test_P test_M test_L
paste test_W test_P test_M test_L > $@
test_WPMLS:test_W test_P test_M test_L test_S
paste test_W test_P test_M test_L test_S > $@
test_WPLGFS:test_W test_P test_L test_G test_F test_S
paste test_W test_P test_L test_G test_F test_S > $@
test_WPMLGFS:test_W test_P test_M test_L test_G test_F test_S
paste test_W test_P test_M test_L test_G test_F test_S > $@
test_Wp: test_W
$(TAGGER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< -S > $@
$(TAGGER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< -S > $@
# cat $< | $(CRF_TAGGER) -L $(LANGUAGE) > $@
test_Wpl: test_Wp
$(LEMMATIZER) -C $(WPLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_WPm: test_WP
$(MORPHO_ANALYZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_Wpm: test_Wp
$(MORPHO_ANALYZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_Wpml: test_Wpm
test_WPMl: test_WPM
$(LEMMATIZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_WPml: test_WPm
$(LEMMATIZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_WPl: test_WP
$(LEMMATIZER) -C $(WPLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_Wpml: test_Wpm
$(LEMMATIZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_WPMLgfs: test_WPML
$(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
test_Wpmlgfs: test_Wpml
test_WPMlgfs: test_WPMl
$(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
test_WPLSgf: test_WPLS
$(PARSER) -L $(LANGUAGE) -C $(WPLSGF_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
test_WPlgfs: test_WPl
$(PARSER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
test_WPLgfs: test_WPL
$(PARSER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
test_Wplgfs: test_Wpl
$(PARSER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
eval_Wpmlgfs: test_WPMLGFS test_Wpmlgfs
$(EVAL_WPLGFS) -g test_WPMLGFS -s test_WPMLgfs
eval_WPMLgfs: test_WPMLGFS test_WPMLgfs
$(EVAL_WPLGFS) -g test_WPMLGFS -s test_WPMLgfs
eval_WPLgfs: test_WPLGFS test_WPLgfs
$(EVAL_WPLGFS) -g test_WPLGFS -s test_WPLgfs
eval_WPlgfs: test_WPLGFS test_WPlgfs
$(EVAL_WPLGFS) -g test_WPLGFS -s test_WPlgfs
test_WPmlgfs: test_WPml
$(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
eval_Wplgfs: test_WPLGFS test_Wplgfs
$(EVAL_WPLGFS) -g test_WPLGFS -s test_Wplgfs
test_Wpmlgfs: test_Wpml
$(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
eval_header:
echo "file pos morpho lemma uas las srec sacc" > $(RESULT_FILE)
eval: eval_header test_WPLGFS test_WPLgfs test_WPlgfs test_Wplgfs test_Wpmlgfs
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPLGFS >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -S WPMLGFS -g test_WPMLGFS -s test_WPMLgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLSGF -g test_WPLGFS -s test_WPLSgf >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPLgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPlgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_Wplgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -S WPMLGFS -g test_WPMLGFS -s test_Wpmlgfs >> $(RESULT_FILE)
eval: eval_header test_WPMLGFS test_WPMLgfs test_WPMlgfs test_WPmlgfs test_Wpmlgfs
$(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPMLGFS >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPMLgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPMlgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_WPmlgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -g test_WPMLGFS -S WPMLGFS -s test_Wpmlgfs >> $(RESULT_FILE)
test_WPLGFS.conll: test_WPLGFS
......@@ -144,22 +110,4 @@ eval_ud: test_WPLGFS.conll test_WPLSgf.conll
python ../../tools/conll17_ud_eval.py test_WPLGFS.conll test_WPLSgf.conll
clean:
# -rm $(RESULT_FILE)
-rm test_W
-rm test_P
-rm test_M
-rm test_L
-rm test_G
-rm test_F
-rm test_S
-rm test_WP
-rm test_WPL
-rm test_WPLS
-rm test_WPLGFS
-rm test_WPMLGFS
-rm test_Wp
-rm test_Wpl
-rm test_WPl
-rm test_WPMLgfs
-rm test_WPLgfs
-rm test_WPlgfs
-rm test_Wplgfs
-rm test_*
......@@ -27,6 +27,7 @@ $(MODEL_FILENAME): $(CFF_CUTOFF_TRAIN)
install:
- cp $(FEATURES_MODEL_FILENAME) ../bin
- cp $(FEATURES_MODEL_FANN_FILENAME) ../bin
- cp $(VOCABS_FILENAME) ../bin
- cp $(MODEL_FILENAME) ../bin
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment