diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index 4f12bef1be8c818a053ad9d9e1b5f5b632472899..1dc11d50e81b555b87640ca3d1eecb6dd5f00d21 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -9,7 +9,7 @@ TEST_FILES=$(shell find . -type f -name '*test*.conllu') THRESHOLD=10 FPLM_FILENAME=fplm -all: tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain +all: writescore_TIME.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain rm -f all_no_test.conllu all_no_test.conllu: @@ -40,6 +40,9 @@ segmenter.ts: echo "EOS b.0" > $@ echo "NOTEOS b.0" >> $@ sed -i -e 's/^/<segmenter> /' $@ + +writescore_TIME.ts: + echo "WRITESCORE b.0 TIME" > $@ transitions: all_no_test.conllu ./getTransitionSets.py $< @@ -48,7 +51,7 @@ texts: ./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES) pretrain: - for col in FORM UPOS FEATS DEPREL LETTERS ; do \ + for col in FORM UPOS LEMMA FEATS DEPREL LETTERS ; do \ ./pretrainEmbeddings.sh $(TRAIN_FILES) $$col 64 $$col.w2v 2> pretrain_log.err || ( cat pretrain_log.err && exit 1 ) ; \ done diff --git a/UD_any/templates/lemmatizer/machine.rm b/UD_any/templates/lemmatizer/machine.rm index 2eca05045438980ff17b214e1bb717db4a11854a..e0a0e12fa737ea8671b712afa8e03ddcbfb2de43 100644 --- a/UD_any/templates/lemmatizer/machine.rm +++ b/UD_any/templates/lemmatizer/machine.rm @@ -4,18 +4,18 @@ Classifier : lemmatizer Transitions : {lemmatizer_rules,data/lemmatizer_rules.ts, lemmatizer_case,data/lemmatizer_case.ts} LossMultiplier : {} Network type : Modular - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{lower:FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-2 -1 0} Stack{} Columns{LEMMA} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1 1} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{prefix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{suffix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{lower:FORM UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{UPOS,data/UPOS.w2v FORM,data/FORM.w2v} + Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{ID EOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Context : Targets{b.-2 b.-1 b.0} Columns{LEMMA} LSTM{1 1 0 1} In{64} Out{64} w2v{LEMMA,data/LEMMA.w2v} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} UppercaseRate : Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} Out{32} - InputDropout : 0.5 - MLP : {4096 0.3} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy } Predictions : LEMMA Strategy diff --git a/UD_any/templates/morpho/machine.rm b/UD_any/templates/morpho/machine.rm index ca27f2b22451aa27709d975f84d7006fa5216575..221261d73c17c845b80e85947f29a2ed2c2fcaaf 100644 --- a/UD_any/templates/morpho/machine.rm +++ b/UD_any/templates/morpho/machine.rm @@ -4,16 +4,17 @@ Classifier : morpho Transitions : {morpho,data/morpho_parts.ts} LossMultiplier : {} Network type : Modular - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FEATS} NbElem{13} Buffer{-2 -1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} + Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{FEATS} NbElem{13} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy } Predictions : FEATS Strategy diff --git a/UD_any/templates/parser/machine.rm b/UD_any/templates/parser/machine.rm index a8ac0ac4fb0948f9720fe901cbbcb4213b3edc98..dfe4ec312cdf62bdeedd0b1c21d3004a2497a118 100644 --- a/UD_any/templates/parser/machine.rm +++ b/UD_any/templates/parser/machine.rm @@ -4,7 +4,7 @@ Classifier : parser Transitions : {parser,data/parser_eager_rel_strict.ts} LossMultiplier : {} Network type : Modular - Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS FEATS EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} Context : Targets{s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} @@ -12,6 +12,8 @@ Classifier : parser MLP : {1600 0.3 1600 0.3} End Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy } Predictions : HEAD DEPREL Strategy diff --git a/UD_any/templates/parserseg/machine.rm b/UD_any/templates/parserseg/machine.rm index b63b81c5c9ccb37291cb5ea118d1011f949d0e39..bde8d1f9f38512105a9dd404788fc699bffebe77 100644 --- a/UD_any/templates/parserseg/machine.rm +++ b/UD_any/templates/parserseg/machine.rm @@ -2,22 +2,19 @@ Name : Parser and Segmenter machine Classifier : parserseg { Transitions : {parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {segmenter,3.0} + LossMultiplier : {} Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0 1 2} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FEATS} NbElem{13} Buffer{-1 0} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-2 -1} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} + Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS FEATS EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Context : Targets{s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy + } Predictions : HEAD DEPREL EOS Strategy diff --git a/UD_any/templates/tagger/machine.rm b/UD_any/templates/tagger/machine.rm index 026d380a436d0de16f93a8086312c34b049b8f8f..3f2048e6ebfc9c0b4e9534844edb59de0c651381 100644 --- a/UD_any/templates/tagger/machine.rm +++ b/UD_any/templates/tagger/machine.rm @@ -4,15 +4,17 @@ Classifier : tagger Transitions : {tagger,data/tagger.ts} LossMultiplier : {} Network type : Modular - Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{b.-3 b.-2 b.-1} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{data/UPOS.w2v} - Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Context : Targets{b.-3 b.-2 b.-1} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{UPOS,data/UPOS.w2v} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} InputDropout : 0.3 MLP : {1600 0.3 1600 0.3} End Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy } Predictions : UPOS Strategy diff --git a/UD_any/templates/taggerparser_base/machine.rm b/UD_any/templates/taggerparser_base/machine.rm deleted file mode 100644 index a7267fb312361ad3732ac25f4252fcb39e6f4d90..0000000000000000000000000000000000000000 --- a/UD_any/templates/taggerparser_base/machine.rm +++ /dev/null @@ -1,31 +0,0 @@ -Name : Tagger, Parser and Segmenter baseline Machine -Classifier : taggerparser -{ - Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {segmenter,3.0} - Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} - End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} -} -Predictions : UPOS HEAD DEPREL EOS -Strategy -{ - Block : End{cannotMove} - tagger tagger * 1 - Block : End{cannotMove} - parser segmenter eager_SHIFT 0 - parser segmenter eager_RIGHT_rel 0 - parser parser * 0 - segmenter parser * 1 -} diff --git a/UD_any/templates/taggerparser_incr/machine.rm b/UD_any/templates/taggerparser_incr/machine.rm deleted file mode 100644 index 85fe8905f815cd597fa127fc68a0624f5d8b666b..0000000000000000000000000000000000000000 --- a/UD_any/templates/taggerparser_incr/machine.rm +++ /dev/null @@ -1,30 +0,0 @@ -Name : Tagger, Parser and Segmenter incremental Machine -Classifier : taggerparser -{ - Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {segmenter,3.0} - Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} - End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} -} -Predictions : UPOS HEAD DEPREL EOS -Strategy -{ - Block : End{cannotMove} - tagger parser * 0 - parser segmenter eager_SHIFT 0 - parser segmenter eager_RIGHT_rel 0 - parser parser * 0 - segmenter tagger * 1 -} diff --git a/UD_any/templates/taggerparser_incr_one/machine.rm b/UD_any/templates/taggerparser_incr_one/machine.rm deleted file mode 100644 index 54888fcf8d65c46ddd8a80bfa99404aa443ece8e..0000000000000000000000000000000000000000 --- a/UD_any/templates/taggerparser_incr_one/machine.rm +++ /dev/null @@ -1,28 +0,0 @@ -Name : Tagger and Parser incremental Machine with one classifier -Classifier : taggerparser -{ - Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts} - LossMultiplier : {} - Network type : Modular - Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} - Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{b.-2 b.-1 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{s.0 s.1 s.2 s.0.0 b.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} - InputDropout : 0.3 - MLP : {1600 0.3 1600 0.3} - End - Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} - -} -Predictions : UPOS HEAD DEPREL -Strategy -{ - Block : End{cannotMove} - tagger parser * 0 - parser tagger eager_SHIFT 1 - parser tagger eager_RIGHT_rel 1 - parser parser * 0 -} diff --git a/UD_any/templates/taggerparser_incr_two/machine.rm b/UD_any/templates/taggerparser_incr_two/machine.rm deleted file mode 100644 index 9d2b181920267dae6d5bbbbde5a7c4c80981ac0f..0000000000000000000000000000000000000000 --- a/UD_any/templates/taggerparser_incr_two/machine.rm +++ /dev/null @@ -1,41 +0,0 @@ -Name : Tagger and Parser incremental Machine with two classifiers -Classifier : tagger -{ - Transitions : {tagger,data/tagger.ts} - LossMultiplier : {} - Network type : Modular - Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} - Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{b.-2 b.-1 s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{s.0 s.1 s.2 b.0.0 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - InputDropout : 0.3 - MLP : {1600 0.3 1600 0.3} - End - Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} -} -Classifier : parser -{ - Transitions : {parser,data/parser_eager_rel_strict.ts} - LossMultiplier : {} - Network type : Modular - Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} - Context : Targets{b.-2 b.-1 b.0 b.1 b.2 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{b.-2 b.-1 b.0 s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Context : Targets{s.0 s.1 s.2 s.0.0 s.0.-1 s.1.0 s.1.-1 s.0.1 s.0.-2 s.1.1 s.1.-2} Columns{DEPREL} LSTM{1 1 0 1} In{64} Out{64} w2v{} - History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} - InputDropout : 0.3 - MLP : {1600 0.3 1600 0.3} - End - Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} -} -Predictions : UPOS HEAD DEPREL -Strategy -{ - Block : End{cannotMove} - tagger parser * 0 - parser tagger eager_SHIFT 1 - parser tagger eager_RIGHT_rel 1 - parser parser * 0 -} diff --git a/UD_any/templates/taggerparser_seq/machine.rm b/UD_any/templates/taggerparser_seq/machine.rm deleted file mode 100644 index 9cbbab8898b33439429a0f21f71cc62d2cf9a566..0000000000000000000000000000000000000000 --- a/UD_any/templates/taggerparser_seq/machine.rm +++ /dev/null @@ -1,31 +0,0 @@ -Name : Tagger, Parser and Segmenter sequential Machine -Classifier : taggerparser -{ - Transitions : {tagger,data/tagger.ts parser,data/parser_eager_rel_strict.ts segmenter,data/segmenter.ts} - LossMultiplier : {segmenter,3.0} - Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Context : Buffer{-3 -2 -1 0 1 2} Stack{1 0} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0 1 2} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} LSTM{1 1 0 1} In{64} Out{64} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} LSTM{1 1 0.0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} - End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} -} -Predictions : UPOS HEAD DEPREL EOS -Strategy -{ - Block : End{cannotMove} - tagger tagger * 1 - Block : End{cannotMove} - parser segmenter eager_SHIFT 0 - parser segmenter eager_RIGHT_rel 0 - parser parser * 0 - segmenter parser * 1 -} diff --git a/UD_any/templates/timer/machine.rm b/UD_any/templates/timer/machine.rm new file mode 100644 index 0000000000000000000000000000000000000000..d29c7fd598ccb595cc3b97acf53d62d815826071 --- /dev/null +++ b/UD_any/templates/timer/machine.rm @@ -0,0 +1,21 @@ +Name : Timer Machine +Classifier : timer +{ + Transitions : {timer,data/writescore_TIME.ts} + LossMultiplier : {} + Network type : Modular + Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} + Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : regression + Loss : mse +} +Predictions : TIME +Strategy +{ + Block : End{cannotMove} + timer timer * 1 +} diff --git a/UD_any/templates/tokenizer/machine.rm b/UD_any/templates/tokenizer/machine.rm index 59ac28094f095638466d4a5c8f675098a6f286c9..e716929628d6a4e944833f841b4d781432d40e60 100644 --- a/UD_any/templates/tokenizer/machine.rm +++ b/UD_any/templates/tokenizer/machine.rm @@ -4,16 +4,18 @@ Classifier : tokenizer Transitions : {tokenizer,data/tokenizer.ts} LossMultiplier : {} Network type : Modular - Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} - Context : Targets{b.-2 b.-1 b.0} Columns{ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} - Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} + Context : Targets{b.-2 b.-1 b.0} Columns{ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} w2v{} History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} InputDropout : 0.3 MLP : {1600 0.3 1600 0.3} End Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} + Type : classification + Loss : crossentropy } Splitwords : data/splitwords.ts Predictions : ID FORM EOS diff --git a/scripts/conll18_ud_eval.py b/scripts/conll18_ud_eval.py index d57179a0ccc6ed338701bff3eb8dc8dfef1a13d8..df4bb875945802f70f66f623f42454c5ca3ab142 100755 --- a/scripts/conll18_ud_eval.py +++ b/scripts/conll18_ud_eval.py @@ -111,6 +111,19 @@ metric2colname = { "Lemmas" : "LEMMA", } +defaultColumns = { +"ID", +"FORM", +"UPOS", +"XPOS", +"LEMMA", +"FEATS", +"HEAD", +"DEPREL", +"DEPS", +"MISC", +} + # Content and functional relations CONTENT_DEPRELS = { "nsubj", "obj", "iobj", "csubj", "ccomp", "xcomp", "obl", "vocative", @@ -129,6 +142,16 @@ UNIVERSAL_FEATURES = { "Tense", "Aspect", "Voice", "Evident", "Polarity", "Person", "Polite" } +################################################################################ +def is_float(value) : + if not isinstance(value, str) : + return False + try: + float(value) + return '.' in value + except ValueError: + return False +################################################################################ ################################################################################ def filter_columns(columns) : @@ -400,10 +423,15 @@ def evaluate(gold_ud, system_ud) : errors = [] for words in alignment.matched_words : if filter_fn is None or filter_fn(words.gold_word) : - if key_fn(words.gold_word, gold_aligned_gold) == key_fn(words.system_word, gold_aligned_system) : - correct += 1 + goldItem = key_fn(words.gold_word, gold_aligned_gold) + systemItem = key_fn(words.system_word, gold_aligned_system) + if (not isinstance(systemItem, str) or '.' not in systemItem or not is_float(systemItem)) or (not isinstance(goldItem, str) or '.' not in goldItem or not is_float(goldItem)) : + if goldItem == systemItem : + correct += 1 + else : + errors.append(words) else : - errors.append(words) + correct -= abs(float(goldItem) - float(systemItem)) return [Score(gold, system, correct, aligned), errors] @@ -532,6 +560,10 @@ def evaluate(gold_ud, system_ud) : if "ID" in col2index : result["Sentences"] = spans_score(gold_ud.sentences, system_ud.sentences) + for colName in col2index : + if colName not in defaultColumns and colName != "_" : + result[colName] = alignment_score(alignment, lambda w, _ : w.columns[col2index[colName]]) + return result ################################################################################ diff --git a/scripts/readMCD.py b/scripts/readMCD.py index 2ced082d26b47f86008990e81de9fc1cf25ac020..f569388841e95587286190373ae5def3f570b522 100644 --- a/scripts/readMCD.py +++ b/scripts/readMCD.py @@ -2,8 +2,11 @@ def readMCD(mcd) : col2index = {} index2col = {} + curId = 0 + for col in mcd.split(' ') : - col2index[col] = len(col2index) - index2col[len(index2col)] = col + col2index[col] = curId + index2col[curId] = col + curId += 1 return col2index, index2col