Skip to content
Snippets Groups Projects
Commit 9ff007c0 authored by Franck Dary's avatar Franck Dary
Browse files

Updated machines and added regression machine timer

parent 1244c827
No related branches found
No related tags found
No related merge requests found
Showing
with 113 additions and 211 deletions
...@@ -9,7 +9,7 @@ TEST_FILES=$(shell find . -type f -name '*test*.conllu') ...@@ -9,7 +9,7 @@ TEST_FILES=$(shell find . -type f -name '*test*.conllu')
THRESHOLD=10 THRESHOLD=10
FPLM_FILENAME=fplm FPLM_FILENAME=fplm
all: tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain all: writescore_TIME.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain
rm -f all_no_test.conllu rm -f all_no_test.conllu
all_no_test.conllu: all_no_test.conllu:
...@@ -40,6 +40,9 @@ segmenter.ts: ...@@ -40,6 +40,9 @@ segmenter.ts:
echo "EOS b.0" > $@ echo "EOS b.0" > $@
echo "NOTEOS b.0" >> $@ echo "NOTEOS b.0" >> $@
sed -i -e 's/^/<segmenter> /' $@ sed -i -e 's/^/<segmenter> /' $@
writescore_TIME.ts:
echo "WRITESCORE b.0 TIME" > $@
transitions: all_no_test.conllu transitions: all_no_test.conllu
./getTransitionSets.py $< ./getTransitionSets.py $<
...@@ -48,7 +51,7 @@ texts: ...@@ -48,7 +51,7 @@ texts:
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES) ./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
pretrain: pretrain:
for col in FORM UPOS FEATS DEPREL LETTERS ; do \ for col in FORM UPOS LEMMA FEATS DEPREL LETTERS ; do \
./pretrainEmbeddings.sh $(TRAIN_FILES) $$col 64 $$col.w2v 2> pretrain_log.err || ( cat pretrain_log.err && exit 1 ) ; \ ./pretrainEmbeddings.sh $(TRAIN_FILES) $$col 64 $$col.w2v 2> pretrain_log.err || ( cat pretrain_log.err && exit 1 ) ; \
done done
......
...@@ -4,18 +4,18 @@ Classifier : lemmatizer ...@@ -4,18 +4,18 @@ Classifier : lemmatizer
Transitions : {lemmatizer_rules,data/lemmatizer_rules.ts, lemmatizer_case,data/lemmatizer_case.ts} Transitions : {lemmatizer_rules,data/lemmatizer_rules.ts, lemmatizer_case,data/lemmatizer_case.ts}
LossMultiplier : {} LossMultiplier : {}
Network type : Modular Network type : Modular
Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{lower:FORM} LSTM{1 1 0 1} In{64} Out{64} Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{lower:FORM UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{UPOS,data/UPOS.w2v FORM,data/FORM.w2v}
Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{ID EOS} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Buffer{-2 -1 0} Stack{} Columns{LEMMA} LSTM{1 1 0 1} In{64} Out{64} Context : Targets{b.-2 b.-1 b.0} Columns{LEMMA} LSTM{1 1 0 1} In{64} Out{64} w2v{LEMMA,data/LEMMA.w2v}
Focused : Column{ID} NbElem{1} Buffer{-1 0 1 1} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{prefix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{prefix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{suffix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{suffix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{EOS} NbElem{1} Buffer{-1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64}
UppercaseRate : Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} Out{32} UppercaseRate : Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} Out{32}
InputDropout : 0.5 InputDropout : 0.3
MLP : {4096 0.3} MLP : {1600 0.3 1600 0.3}
End End
Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
Type : classification
Loss : crossentropy
} }
Predictions : LEMMA Predictions : LEMMA
Strategy Strategy
......
...@@ -4,16 +4,17 @@ Classifier : morpho ...@@ -4,16 +4,17 @@ Classifier : morpho
Transitions : {morpho,data/morpho_parts.ts} Transitions : {morpho,data/morpho_parts.ts}
LossMultiplier : {} LossMultiplier : {}
Network type : Modular Network type : Modular
Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} LSTM{1 1 0 1} In{64} Out{64} Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2}
Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{FEATS} NbElem{13} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{FEATS} NbElem{13} Buffer{-2 -1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{EOS} NbElem{1} Buffer{-1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} InputDropout : 0.3
InputDropout : 0.5 MLP : {1600 0.3 1600 0.3}
MLP : {2048 0.3 2048 0.3}
End End
Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
Type : classification
Loss : crossentropy
} }
Predictions : FEATS Predictions : FEATS
Strategy Strategy
......
...@@ -4,7 +4,7 @@ Classifier : parser ...@@ -4,7 +4,7 @@ Classifier : parser
Transitions : {parser,data/parser_eager_rel_strict.ts} Transitions : {parser,data/parser_eager_rel_strict.ts}
LossMultiplier : {} LossMultiplier : {}
Network type : Modular Network type : Modular
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2}
Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS FEATS EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS FEATS EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} Context : Targets{s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{}
History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32}
...@@ -12,6 +12,8 @@ Classifier : parser ...@@ -12,6 +12,8 @@ Classifier : parser
MLP : {1600 0.3 1600 0.3} MLP : {1600 0.3 1600 0.3}
End End
Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
Type : classification
Loss : crossentropy
} }
Predictions : HEAD DEPREL Predictions : HEAD DEPREL
Strategy Strategy
......
...@@ -2,22 +2,19 @@ Name : Parser and Segmenter machine ...@@ -2,22 +2,19 @@ Name : Parser and Segmenter machine
Classifier : parserseg Classifier : parserseg
{ {
Transitions : {parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} Transitions : {parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts}
LossMultiplier : {segmenter,3.0} LossMultiplier : {}
Network type : Modular Network type : Modular
StateName : Out{1024} Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2}
Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS FEATS EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Buffer{-3 -2 -1 0 1 2} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} Context : Targets{s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32}
Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} InputDropout : 0.3
Focused : Column{FEATS} NbElem{13} Buffer{-1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} MLP : {1600 0.3 1600 0.3}
Focused : Column{EOS} NbElem{1} Buffer{-2 -1} Stack{} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64}
History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64}
InputDropout : 0.5
MLP : {2048 0.3 2048 0.3}
End End
Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
Type : classification
Loss : crossentropy
} }
Predictions : HEAD DEPREL EOS Predictions : HEAD DEPREL EOS
Strategy Strategy
......
...@@ -4,15 +4,17 @@ Classifier : tagger ...@@ -4,15 +4,17 @@ Classifier : tagger
Transitions : {tagger,data/tagger.ts} Transitions : {tagger,data/tagger.ts}
LossMultiplier : {} LossMultiplier : {}
Network type : Modular Network type : Modular
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2}
Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{b.-3 b.-2 b.-1} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{data/UPOS.w2v} Context : Targets{b.-3 b.-2 b.-1} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{UPOS,data/UPOS.w2v}
Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
InputDropout : 0.3 InputDropout : 0.3
MLP : {1600 0.3 1600 0.3} MLP : {1600 0.3 1600 0.3}
End End
Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
Type : classification
Loss : crossentropy
} }
Predictions : UPOS Predictions : UPOS
Strategy Strategy
......
Name : Tagger, Parser and Segmenter baseline Machine
Classifier : taggerparser
{
Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts}
LossMultiplier : {segmenter,3.0}
Network type : Modular
StateName : Out{1024}
Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64}
Context : Buffer{-3 -2 -1 0} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64}
History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64}
InputDropout : 0.5
MLP : {2048 0.3 2048 0.3}
End
Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true}
}
Predictions : UPOS HEAD DEPREL EOS
Strategy
{
Block : End{cannotMove}
tagger tagger * 1
Block : End{cannotMove}
parser segmenter eager_SHIFT 0
parser segmenter eager_RIGHT_rel 0
parser parser * 0
segmenter parser * 1
}
Name : Tagger, Parser and Segmenter incremental Machine
Classifier : taggerparser
{
Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts}
LossMultiplier : {segmenter,3.0}
Network type : Modular
StateName : Out{1024}
Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64}
Context : Buffer{-3 -2 -1 0} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64}
History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64}
InputDropout : 0.5
MLP : {2048 0.3 2048 0.3}
End
Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true}
}
Predictions : UPOS HEAD DEPREL EOS
Strategy
{
Block : End{cannotMove}
tagger parser * 0
parser segmenter eager_SHIFT 0
parser segmenter eager_RIGHT_rel 0
parser parser * 0
segmenter tagger * 1
}
Name : Tagger and Parser incremental Machine with one classifier
Classifier : taggerparser
{
Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts}
LossMultiplier : {}
Network type : Modular
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2}
Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{b.-2 b.-1 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{s.0 s.1 s.2 s.0.0 b.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64}
History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32}
InputDropout : 0.3
MLP : {1600 0.3 1600 0.3}
End
Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
}
Predictions : UPOS HEAD DEPREL
Strategy
{
Block : End{cannotMove}
tagger parser * 0
parser tagger eager_SHIFT 1
parser tagger eager_RIGHT_rel 1
parser parser * 0
}
Name : Tagger and Parser incremental Machine with two classifiers
Classifier : tagger
{
Transitions : {tagger,data/tagger.ts}
LossMultiplier : {}
Network type : Modular
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2}
Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{b.-2 b.-1 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64}
InputDropout : 0.3
MLP : {1600 0.3 1600 0.3}
End
Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
}
Classifier : parser
{
Transitions : {parser,data/parser_eager_rel_strict.ts}
LossMultiplier : {}
Network type : Modular
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2}
Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{b.-2 b.-1 b.0 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{}
History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32}
InputDropout : 0.3
MLP : {1600 0.3 1600 0.3}
End
Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
}
Predictions : UPOS HEAD DEPREL
Strategy
{
Block : End{cannotMove}
tagger parser * 0
parser tagger eager_SHIFT 1
parser tagger eager_RIGHT_rel 1
parser parser * 0
}
Name : Tagger, Parser and Segmenter sequential Machine
Classifier : taggerparser
{
Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts}
LossMultiplier : {segmenter,3.0}
Network type : Modular
StateName : Out{1024}
Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64}
Context : Buffer{-3 -2 -1 0 1 2} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} LSTM{1 1 0 1} In{64} Out{64}
Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64}
DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64}
History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64}
InputDropout : 0.5
MLP : {2048 0.3 2048 0.3}
End
Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true}
}
Predictions : UPOS HEAD DEPREL EOS
Strategy
{
Block : End{cannotMove}
tagger tagger * 1
Block : End{cannotMove}
parser segmenter eager_SHIFT 0
parser segmenter eager_RIGHT_rel 0
parser parser * 0
segmenter parser * 1
}
Name : Timer Machine
Classifier : timer
{
Transitions : {timer,data/writescore_TIME.ts}
LossMultiplier : {}
Network type : Modular
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2}
Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{}
InputDropout : 0.3
MLP : {1600 0.3 1600 0.3}
End
Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
Type : regression
Loss : mse
}
Predictions : TIME
Strategy
{
Block : End{cannotMove}
timer timer * 1
}
...@@ -4,16 +4,18 @@ Classifier : tokenizer ...@@ -4,16 +4,18 @@ Classifier : tokenizer
Transitions : {tokenizer,data/tokenizer.ts} Transitions : {tokenizer,data/tokenizer.ts}
LossMultiplier : {} LossMultiplier : {}
Network type : Modular Network type : Modular
Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2}
Context : Targets{b.-2 b.-1 b.0} Columns{ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} Context : Targets{b.-2 b.-1 b.0} Columns{ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} w2v{}
History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32}
InputDropout : 0.3 InputDropout : 0.3
MLP : {1600 0.3 1600 0.3} MLP : {1600 0.3 1600 0.3}
End End
Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} Optimizer : Adagrad {0.01 0.000001 0 0.0000000001}
Type : classification
Loss : crossentropy
} }
Splitwords : data/splitwords.ts Splitwords : data/splitwords.ts
Predictions : ID FORM EOS Predictions : ID FORM EOS
......
...@@ -111,6 +111,19 @@ metric2colname = { ...@@ -111,6 +111,19 @@ metric2colname = {
"Lemmas" : "LEMMA", "Lemmas" : "LEMMA",
} }
defaultColumns = {
"ID",
"FORM",
"UPOS",
"XPOS",
"LEMMA",
"FEATS",
"HEAD",
"DEPREL",
"DEPS",
"MISC",
}
# Content and functional relations # Content and functional relations
CONTENT_DEPRELS = { CONTENT_DEPRELS = {
"nsubj", "obj", "iobj", "csubj", "ccomp", "xcomp", "obl", "vocative", "nsubj", "obj", "iobj", "csubj", "ccomp", "xcomp", "obl", "vocative",
...@@ -129,6 +142,16 @@ UNIVERSAL_FEATURES = { ...@@ -129,6 +142,16 @@ UNIVERSAL_FEATURES = {
"Tense", "Aspect", "Voice", "Evident", "Polarity", "Person", "Polite" "Tense", "Aspect", "Voice", "Evident", "Polarity", "Person", "Polite"
} }
################################################################################
def is_float(value) :
if not isinstance(value, str) :
return False
try:
float(value)
return '.' in value
except ValueError:
return False
################################################################################
################################################################################ ################################################################################
def filter_columns(columns) : def filter_columns(columns) :
...@@ -400,10 +423,15 @@ def evaluate(gold_ud, system_ud) : ...@@ -400,10 +423,15 @@ def evaluate(gold_ud, system_ud) :
errors = [] errors = []
for words in alignment.matched_words : for words in alignment.matched_words :
if filter_fn is None or filter_fn(words.gold_word) : if filter_fn is None or filter_fn(words.gold_word) :
if key_fn(words.gold_word, gold_aligned_gold) == key_fn(words.system_word, gold_aligned_system) : goldItem = key_fn(words.gold_word, gold_aligned_gold)
correct += 1 systemItem = key_fn(words.system_word, gold_aligned_system)
if (not isinstance(systemItem, str) or '.' not in systemItem or not is_float(systemItem)) or (not isinstance(goldItem, str) or '.' not in goldItem or not is_float(goldItem)) :
if goldItem == systemItem :
correct += 1
else :
errors.append(words)
else : else :
errors.append(words) correct -= abs(float(goldItem) - float(systemItem))
return [Score(gold, system, correct, aligned), errors] return [Score(gold, system, correct, aligned), errors]
...@@ -532,6 +560,10 @@ def evaluate(gold_ud, system_ud) : ...@@ -532,6 +560,10 @@ def evaluate(gold_ud, system_ud) :
if "ID" in col2index : if "ID" in col2index :
result["Sentences"] = spans_score(gold_ud.sentences, system_ud.sentences) result["Sentences"] = spans_score(gold_ud.sentences, system_ud.sentences)
for colName in col2index :
if colName not in defaultColumns and colName != "_" :
result[colName] = alignment_score(alignment, lambda w, _ : w.columns[col2index[colName]])
return result return result
################################################################################ ################################################################################
......
...@@ -2,8 +2,11 @@ def readMCD(mcd) : ...@@ -2,8 +2,11 @@ def readMCD(mcd) :
col2index = {} col2index = {}
index2col = {} index2col = {}
curId = 0
for col in mcd.split(' ') : for col in mcd.split(' ') :
col2index[col] = len(col2index) col2index[col] = curId
index2col[len(index2col)] = col index2col[curId] = col
curId += 1
return col2index, index2col return col2index, index2col
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment