Skip to content
Snippets Groups Projects
Commit 9f27e7bb authored by Alexis Nasr's avatar Alexis Nasr
Browse files

bug fixed in eval_wplgfs.pl and in conll_lib

parent f03779cc
Branches
No related tags found
No related merge requests found
DECODA_DIR=../../../data/decoda
TOOLS_GEN=../../../tools
#TOOLS_ORFEO=../../tools
TOOLS_ORFEO=$(TOOLS_GEN)
TOOLS=../../../tools
TRAIN=orfeo.train.conll07
TEST=orfeo.test.conll07
CORPUS=$(DECODA_DIR)/corpus_decoda_lot1_2_silver.tsv
TEST=$(DECODA_DIR)/corpus_decoda_gold1_checked_tbaz11decembre2013.tsv
compile: train.mcf test.mcf #dev.mcf
TRAIN=decoda.train.conll07
TRAIN_NODISF=decoda.train.nodisf.conll07
TRAIN_NOTRONC=decoda.train.notronc.conll07
train.mcf: $(TRAIN)
$(TOOLS)/conll2mcf -f $< -1W -2C -3L -4H -5D > $@
compile: train.conll07 test.conll07
#elimine les mots tronques
train.conll07: decoda.train.conll07
$(TOOLS_ORFEO)/decoda2orfeo -t -f $< > $@
#elimine les mots tronques et les disfluences
#train.conll07: decoda.train.conll07
# $(TOOLS_ORFEO)/decoda2orfeo -td -f $< > $@
#train.conll07: decoda.train.conll07
# $(TOOLS_ORFEO)/decoda2orfeo -f $< > $@
decoda.train.conll07: $(CORPUS) ./split_decoda.pl
./split_decoda.pl < $<
cat decoda.train.tsv |$(TOOLS_ORFEO)/process_decoda_tsv -lemma list_mot_pos_lemme_lefff.txt -fmtout conll07 -addlinkdisf > $@
test.conll07: decoda.test.conll07
$(TOOLS_ORFEO)/decoda2orfeo -t -f $< > $@
#test.nodisf.conll07: decoda.test.conll07
# $(TOOLS_ORFEO)/decoda2orfeo -td -f $< > $@
#test.conll07: decoda.test.conll07
# $(TOOLS_ORFEO)/decoda2orfeo -f $< > $@
decoda.test.conll07: $(TEST)
cat $< |$(TOOLS_ORFEO)/process_decoda_tsv -lemma list_mot_pos_lemme_lefff.txt -fmtout conll07 -addlinkdisf > $@
test.mcf: $(TEST)
$(TOOLS)/conll2mcf -f $< -1W -2C -3L -4H -5D > $@
clean:
- rm decoda.train.conll07
- rm decoda.test.conll07
- rm test.conll07
- rm train.conll07
- rm test.mcf train.mcf
CONLL07TRAIN=../data/treebank/train.conll07
CONLL07DEV=../data/treebank/dev.conll07
CONLL07TEST=../data/treebank/test.conll07
MCF_TRAIN=../data/treebank/train.mcf
MCF_DEV=../data/treebank/dev.mcf
MCF_TEST=../data/treebank/test.mcf
CFF_TRAIN=train.cff
CFF_CUTOFF_TRAIN=train.cutoff.cff
PERCEPTRON_ITERATIONS=5
CFF_CUTOFF=1
CFF_CUTOFF=3
FEATURES_MODEL_FILENAME=maca_trans_parser.fm
VOCABS_FILENAME=maca_trans_parser.vocab
MODEL_FILENAME=maca_trans_parser.model
NUMBER_OF_SENTENCES=10000000
STREAM_MODE=
#STREAM_MODE= -S
NUMBER_OF_SENTENCES=4218
#NUMBER_OF_SENTENCES=1000
MCD_FILENAME=wplgfs.mcd
STREAM_MODE= -S
include ../../makefiles/maca_trans_parser.makefile
CONLL07TRAIN=../data/treebank/train.conll07
CONLL07DEV=../data/treebank/dev.conll07
CONLL07TEST=../data/treebank/test.conll07
MCF_TRAIN=../data/treebank/train.mcf
MCF_DEV=../data/treebank/dev.mcf
MCF_TEST=../data/treebank/test.mcf
CFF_TRAIN=train.cff
CFF_CUTOFF_TRAIN=train.cutoff.cff
PERCEPTRON_ITERATIONS=5
PERCEPTRON_ITERATIONS=9
CFF_CUTOFF=1
FEATURES_MODEL_FILENAME=maca_trans_tagger.fm
VOCABS_FILENAME=maca_trans_tagger.vocab
MCD_FILENAME=maca_trans_tagger.mcd
MODEL_FILENAME=maca_trans_tagger.model
NUMBER_OF_SENTENCES=10000000
STREAM_MODE=
#STREAM_MODE= -S
STREAM_MODE= -S
FORM_POS_FILENAME=../data/morpho-lexicon/fP
#include ./maca_trans_tagger.makefile
include ../../makefiles/maca_trans_tagger.makefile
......@@ -165,7 +165,8 @@ int parse_line(FILE *f, sentence *s)
/* 4 swept sweep _ VBD _ 26 ccomp _ _ */
/* 5 through through _ IN _ 4 prep _ _ */
sscanf(buff, "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel);
/* sscanf(buff, "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel); */
sscanf(buff, "%d\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel);
/* printf("form = %s\n", w->form);
printf("lemma = %s\n", w->lemma);
printf("cpostag = %s\n", w->cpostag);
......
......@@ -68,9 +68,9 @@ sub is_punctuation_ftb{
while(<REF>){
$line_nb++;
($ref_form, $ref_pos, $ref_lemma, $ref_gov, $ref_fct, $ref_seg) = split;
($ref_form, $ref_pos, $ref_lemma, $ref_gov, $ref_fct, $ref_seg) = split /\t/;
$_ = <HYP>;
($hyp_form, $hyp_pos, $hyp_lemma, $hyp_gov, $hyp_fct, $hyp_seg) = split;
($hyp_form, $hyp_pos, $hyp_lemma, $hyp_gov, $hyp_fct, $hyp_seg) = split /\t/;
if($ref_seg){ $nb_ref_seg++;}
if($hyp_seg){ $nb_hyp_seg++;}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment