Skip to content
Snippets Groups Projects
Commit b42fd532 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added makefiles to compile morphological analyzer for UD and fr

parent f21d8b99
No related branches found
No related tags found
No related merge requests found
MCF_TRAIN=../data/treebank/train.mcf
MCF_DEV=../data/treebank/dev.mcf
MCF_TEST=../data/treebank/test.mcf
CFF_TRAIN=train.cff
FANN_TRAIN=train.fann
CFF_FANN_TRAIN=train.fann.cff
CFF_CUTOFF_TRAIN=train.cutoff.cff
PERCEPTRON_ITERATIONS=9
CFF_CUTOFF=1
FEATURES_MODEL_FILENAME=../../fm/maca_trans_morpho.fm
FEATURES_MODEL_FANN_FILENAME=../../fm/maca_trans_tagger_fann.fm
VOCABS_FILENAME=maca_trans_morpho.vocab
VOCABS_FANN_FILENAME=maca_trans_morpho_fann.vocab
MCD_FILENAME=../../mcd/wpmlgfs.mcd
MODEL_FILENAME=maca_trans_morpho.model
NUMBER_OF_SENTENCES=10000000
include ../../makefiles/maca_trans_morpho.makefile
b0U1
b0f
b0len
bm1f
bm2f
b0p
bm1p
bm2p
bm3p
bm2p bm1p
bm2p bm3p
b0s1
b0s2
b0s3
b0s4
b0s5
b0s1 b0s2
b0s1 b0s2 b0s3
b0s1 b0s2 b0s3 b0s4
b0f
b0len
b0p
b0s1
b0s1
b0s1
b0s1
b0s2
b0s2
b0s2
b0s2
b0s3
b0s3
b0s3
b0s4
b0s4
b0s5
b0U1
bm1f
bm1p
bm1p
bm2f
bm2p
bm2p
bm2p
bm3p
bm3p
......@@ -6,17 +6,19 @@ compile:
$(MAKE) -C data/morpho-lexicon compile
$(MAKE) -C data/treebank compile
$(MAKE) -C maca_lexer compile
$(MAKE) -C maca_trans_parser compile
$(MAKE) -C maca_trans_tagger compile
$(MAKE) -C maca_crf_tagger compile
$(MAKE) -C maca_trans_morpho compile
$(MAKE) -C maca_trans_parser compile
# $(MAKE) -C maca_crf_tagger compile
install:
-mkdir -p bin
$(MAKE) -C maca_trans_parser install
$(MAKE) -C maca_trans_tagger install
$(MAKE) -C maca_trans_morpho install
$(MAKE) -C maca_trans_parser install
$(MAKE) -C maca_lemmatizer install
$(MAKE) -C maca_lexer install
$(MAKE) -C maca_crf_tagger install
# $(MAKE) -C maca_crf_tagger install
# @tar -cvzf ./maca_datas.tgz bin
evaluation:
......@@ -27,6 +29,7 @@ clean:
$(MAKE) -C data/treebank clean
$(MAKE) -C maca_lexer clean
$(MAKE) -C maca_trans_parser clean
$(MAKE) -C maca_trans_morpho clean
$(MAKE) -C maca_trans_tagger clean
$(MAKE) -C maca_crf_tagger clean
$(MAKE) -C eval clean
......@@ -4,5 +4,29 @@ de det un #####
aux prep à #####
au prep à #####
du prep de #####
M titre m #####
MM titre MM #####
\ No newline at end of file
M titre M #####
m titre M #####
Mr titre M #####
mr titre M #####
MM titre M #####
mm titre M #####
Mme titre M #####
mme titre M #####
Mmes titre M #####
mmes titre M #####
Mlle titre M #####
mlle titre M #####
Mlles titre M #####
mlles titre M #####
Dr titre docteur #####
Drs titre docteur #####
Pr titre professeur #####
Prs titre professeur #####
Mgr titre monseigneur #####
mgr titre monseigneur #####
......@@ -3,4 +3,4 @@ DEV=../data/treebank/dev.mcf
LANGUAGE=fr
MCD_FILE=wplgfs.mcd
include ../../makefiles/eval.makefile
include ../../makefiles/eval_ud.makefile
MCF_TRAIN=../data/treebank/train.mcf
MCF_DEV=../data/treebank/dev.mcf
MCF_TEST=../data/treebank/test.mcf
CFF_TRAIN=train.cff
FANN_TRAIN=train.fann
CFF_FANN_TRAIN=train.fann.cff
CFF_CUTOFF_TRAIN=train.cutoff.cff
PERCEPTRON_ITERATIONS=9
CFF_CUTOFF=1
FEATURES_MODEL_FILENAME=../../fm/maca_trans_morpho.fm
FEATURES_MODEL_FANN_FILENAME=../../fm/maca_trans_tagger_fann.fm
VOCABS_FILENAME=maca_trans_morpho.vocab
VOCABS_FANN_FILENAME=maca_trans_morpho_fann.vocab
MCD_FILENAME=../../mcd/wpmlgfs.mcd
MODEL_FILENAME=maca_trans_morpho.model
NUMBER_OF_SENTENCES=10000000
include ../../makefiles/maca_trans_morpho.makefile
......@@ -12,7 +12,7 @@ VOCABS_FILENAME=maca_trans_parser.vocab
MODEL_FILENAME=maca_trans_parser.model
NUMBER_OF_SENTENCES=10000000
#NUMBER_OF_SENTENCES=1000
MCD_FILENAME=wplgfs.mcd
MCD_FILENAME=../../mcd/wpmlgfs.mcd
STREAM_MODE= -S
include ../../makefiles/maca_trans_parser.makefile
......@@ -47,3 +47,9 @@ t2
bm1p
bm2p
s0m b0m
s0p s0m b0p b0m
#s0p b0m b0p
#s0p s0m b0p
TAGGER=maca_trans_tagger
PARSER=maca_trans_parser
CRF_TAGGER=crf_barebones_decoder
MORPHO_ANALYZER=maca_trans_morpho
LEMMATIZER=maca_lemmatizer
EVAL_WPLGFS=../../tools/eval_wplgfs.pl
EVAL_WPLSGF=../../tools/eval_wplsgf.pl
......@@ -75,12 +76,21 @@ test_Wp: test_W
test_Wpl: test_Wp
$(LEMMATIZER) -C $(WPLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_Wpm: test_Wp
$(MORPHO_ANALYZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_Wpml: test_Wpm
$(LEMMATIZER) -C $(WPMLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_WPl: test_WP
$(LEMMATIZER) -C $(WPLGFS_MCD_FILE) -L $(LANGUAGE) -i $< > $@
test_WPMLgfs: test_WPML
$(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
test_Wpmlgfs: test_Wpml
$(PARSER) -L $(LANGUAGE) -C $(WPMLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
test_WPLSgf: test_WPLS
$(PARSER) -L $(LANGUAGE) -C $(WPLSGF_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
......@@ -93,6 +103,9 @@ test_WPLgfs: test_WPL
test_Wplgfs: test_Wpl
$(PARSER) -L $(LANGUAGE) -C $(WPLGFS_MCD_FILE) -i $< $(PARSER_OPTIONS) > $@
eval_Wpmlgfs: test_WPMLGFS test_Wpmlgfs
$(EVAL_WPLGFS) -g test_WPMLGFS -s test_WPMLgfs
eval_WPMLgfs: test_WPMLGFS test_WPMLgfs
$(EVAL_WPLGFS) -g test_WPMLGFS -s test_WPMLgfs
......@@ -106,15 +119,16 @@ eval_Wplgfs: test_WPLGFS test_Wplgfs
$(EVAL_WPLGFS) -g test_WPLGFS -s test_Wplgfs
eval_header:
echo "file pos lemma uas las srec sacc" > $(RESULT_FILE)
echo "file pos morpho lemma uas las srec sacc" > $(RESULT_FILE)
eval: eval_header test_WPLGFS test_WPLgfs test_WPlgfs test_Wplgfs
eval: eval_header test_WPLGFS test_WPLgfs test_WPlgfs test_Wplgfs test_Wpmlgfs
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPLGFS >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -S WPMLGFS -g test_WPMLGFS -s test_WPMLgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLSGF -g test_WPLGFS -s test_WPLSgf >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPLgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_WPlgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPLGFS -S WPLGFS -g test_WPLGFS -s test_Wplgfs >> $(RESULT_FILE)
$(EVAL_MCF) -G WPMLGFS -S WPMLGFS -g test_WPMLGFS -s test_Wpmlgfs >> $(RESULT_FILE)
test_WPLGFS.conll: test_WPLGFS
......
##-----------------------------------------------------------------------
## compile
##-----------------------------------------------------------------------
compile: $(MODEL_FILENAME) $(FANN_TRAIN)
$(CFF_TRAIN): $(MCF_TRAIN)
maca_trans_morpho_mcf2cff -C $(MCD_FILENAME) --input $< --mode TRAIN --feat_model $(FEATURES_MODEL_FILENAME) --vocabs $(VOCABS_FILENAME) --cff $@ -s $(NUMBER_OF_SENTENCES)
$(CFF_FANN_TRAIN): $(MCF_TRAIN)
maca_trans_morpho_mcf2cff -C $(MCD_FILENAME) --input $< --mode TRAIN --feat_model $(FEATURES_MODEL_FANN_FILENAME) --vocabs $(VOCABS_FANN_FILENAME) --cff $@ -s $(NUMBER_OF_SENTENCES)
$(CFF_CUTOFF_TRAIN): $(CFF_TRAIN)
cff_cutoff --input $< --vocabs $(VOCABS_FILENAME) --cutoff $(CFF_CUTOFF) > $@
$(FANN_TRAIN): $(CFF_FANN_TRAIN)
cff2fann --vocabs $(VOCABS_FANN_FILENAME) --cff $< --feat_model $(FEATURES_MODEL_FANN_FILENAME) -C $(MCD_FILENAME) > $@
$(MODEL_FILENAME): $(CFF_CUTOFF_TRAIN)
#$(MODEL_FILENAME): $(CFF_TRAIN)
perceptron_train --cff $< --model $(MODEL_FILENAME) -n $(PERCEPTRON_ITERATIONS)
##-----------------------------------------------------------------------
## install
##-----------------------------------------------------------------------
install:
- cp $(FEATURES_MODEL_FILENAME) ../bin
- cp $(VOCABS_FILENAME) ../bin
- cp $(MODEL_FILENAME) ../bin
##-----------------------------------------------------------------------
## clean
##-----------------------------------------------------------------------
clean:
- rm -f $(VOCABS_FILENAME)
- rm -f $(MODEL_FILENAME)
- rm -f $(CFF_TRAIN)
- rm -f $(CFF_CUTOFF_TRAIN)
......@@ -191,6 +191,13 @@ while(<REF>){
# print "$ref_pos $hyp_pos\n";
}
if($ref_morph eq $hyp_morph){
$correct_morph_total_nb++;
}
else{
# print "$ref_form \t $ref_lemma \t $hyp_lemma\n";
}
if($ref_lemma eq $hyp_lemma){
$correct_lemma_total_nb++;
}
......@@ -227,6 +234,7 @@ close HYP;
my $pos_acc = $correct_pos_total_nb / $word_nb * 100;
my $lemma_acc = $correct_lemma_total_nb / $word_nb * 100;
my $morph_acc = $correct_morph_total_nb / $word_nb * 100;
my $las = $correct_gov_fct_total_nb / $word_nb * 100;
my $uas = $correct_gov_total_nb / $word_nb * 100 ;
......@@ -236,8 +244,8 @@ my $seg_recall = $nb_hyp_ref_seg / $nb_ref_seg;
my $seg_precision = $nb_hyp_ref_seg / ($nb_hyp_seg + 1);
printf(stderr "pos acc = %.2f lemma acc = %.2f uas = %.2f las = %.2f seg recall = %.2f seg precision = %.2f size = %d\n", $pos_acc, $lemma_acc, $uas, $las, $seg_recall, $seg_precision, $word_nb);
printf(stdout "%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%d\n", $hyp, $pos_acc, $lemma_acc, $uas, $las, $seg_recall, $seg_precision, $word_nb);
printf(stderr "pos acc = %.2f morph acc = %.2f lemma acc = %.2f uas = %.2f las = %.2f seg recall = %.2f seg precision = %.2f size = %d\n", $pos_acc, $morph_acc, $lemma_acc, $uas, $las, $seg_recall, $seg_precision, $word_nb);
printf(stdout "%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%d\n", $hyp, $pos_acc, $morph_acc, $lemma_acc, $uas, $las, $seg_recall, $seg_precision, $word_nb);
......
......@@ -173,9 +173,9 @@ void print_sentence_no_newline(sentence *s)
/* fprintf(stdout, "\t%s", w->form); */
fprintf(stdout, "%s", w->form);
fprintf(stdout, "\t%s", w->postag);
fprintf(stdout, "\t%s", w->feats);
fprintf(stdout, "\t%s", w->lemma);
/* fprintf(stdout, "\t%s", w->cpostag); */
/* fprintf(stdout, "\t%s", w->feats); */
fprintf(stdout, "\t%d", w->head);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment