diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index 6dedd6a3932c8dc2762b296eb2e846e795d99914..dd2c6c9d98e2a2276a028b48fd91c78992426c13 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -26,7 +26,7 @@ tokenizer.ts: all_no_test.conllu echo "SPLIT 5" >> $@ echo "SPLIT 6" >> $@ echo "SPLIT 7" >> $@ - echo "ADDCHARTOWORD" >> $@ + echo "ADDCHARTOWORD 1" >> $@ echo "IGNORECHAR" >> $@ sed -i -e 's/^/<tokenizer> /' $@ sed -i -e 's/^/<tokenizer> /' splitwords.ts diff --git a/UD_any/templates/tokenizer/machine.rm b/UD_any/templates/tokenizer/machine.rm index a2f74060cbd99a8ef55b6c2e679bff40cb975033..8b1305ccfa79bf85a47cc0f6388e33e30335779d 100644 --- a/UD_any/templates/tokenizer/machine.rm +++ b/UD_any/templates/tokenizer/machine.rm @@ -4,17 +4,16 @@ Classifier : tokenizer Transitions : {tokenizer,data/tokenizer.ts} LossMultiplier : {} Network type : Modular - StateName : Out{1024} - Context : Buffer{-3 -2 -1} Stack{} Columns{FORM} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{ID} NbElem{1} Buffer{-3 -2 -1} Stack{} LSTM{1 1 0 1} In{64} Out{64} - Focused : Column{FORM} NbElem{13} Buffer{-1 0} Stack{} LSTM{1 1 0 1} In{64} Out{64} - History : NbElem{10} LSTM{1 1 0 1} In{64} Out{64} - RawInput : Left{5} Right{5} LSTM{1 1 0.0 1} In{32} Out{32} - SplitTrans : LSTM{1 1 0.0 1} In{64} Out{64} - InputDropout : 0.5 - MLP : {2048 0.3 2048 0.3} - End - Optimizer : Adam {0.0003 0.9 0.999 0.00000001 0.00002 true} + Contextual : Window{-10 0} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{data/pretrained.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2} + Context : Targets{b.-2 b.-1 b.0} Columns{ID} LSTM{1 1 0 1} In{64} Out{64} w2v{} + Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} + RawInput : Left{5} Right{10} LSTM{1 1 0.0 1} In{32} Out{32} + History : NbElem{10} LSTM{1 1 0 1} In{32} Out{32} + InputDropout : 0.3 + MLP : {1600 0.3 1600 0.3} + End + Optimizer : Adagrad {0.01 0.000001 0 0.0000000001} } Splitwords : data/splitwords.ts Predictions : ID FORM EOS