diff --git a/UD_any/data/feature_models/morpho_incremental.fm b/UD_any/data/feature_models/morpho_incremental.fm new file mode 100644 index 0000000000000000000000000000000000000000..2086605208d5ed6b39b1ae4d92c6de0a91430965 --- /dev/null +++ b/UD_any/data/feature_models/morpho_incremental.fm @@ -0,0 +1,28 @@ +# FORM +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 + diff --git a/UD_any/data/feature_models/tagger_incremental.fm b/UD_any/data/feature_models/tagger_incremental.fm new file mode 100644 index 0000000000000000000000000000000000000000..1bd749b37e31eaedb1b3b350cde34d1746239ae4 --- /dev/null +++ b/UD_any/data/feature_models/tagger_incremental.fm @@ -0,0 +1,25 @@ +# FORM +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-3#POS +b.-2#POS +b.-1#POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# MORPHO +b.-2#MORPHO +b.-1#MORPHO diff --git a/UD_any/data/feature_models/tokenizer_incremental.fm b/UD_any/data/feature_models/tokenizer_incremental.fm new file mode 100644 index 0000000000000000000000000000000000000000..d10a62e7563f024943d3defb266b2214d4920202 --- /dev/null +++ b/UD_any/data/feature_models/tokenizer_incremental.fm @@ -0,0 +1,42 @@ +# FORM +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# UPPERCASE +b.-1#FORM.U +# LENGTH +b.-1#FORM.LEN +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# EOS +b.-1#EOS +b.-2#EOS diff --git a/UD_any/tokeparser_incremental/lemmatizer_case.cla b/UD_any/tokeparser_incremental/lemmatizer_case.cla new file mode 100644 index 0000000000000000000000000000000000000000..0994d40c10be7705854b52504754c3d6bf9a1a3b --- /dev/null +++ b/UD_any/tokeparser_incremental/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/lemmatizer_rules.fm +Action Set : data/lemmatizer_case.as +Topology : (100,RELU,0.1) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental/lemmatizer_lookup.cla new file mode 100644 index 0000000000000000000000000000000000000000..41f63b2bdd7c6f77b01de9b737396b6302adf0cb --- /dev/null +++ b/UD_any/tokeparser_incremental/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental/lemmatizer_rules.cla b/UD_any/tokeparser_incremental/lemmatizer_rules.cla new file mode 100644 index 0000000000000000000000000000000000000000..9b59b622f3120d5a6df4bb24a15d1a2d371129ba --- /dev/null +++ b/UD_any/tokeparser_incremental/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/lemmatizer_rules.fm +Action Set : data/lemmatizer_rules.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental/machine.tm b/UD_any/tokeparser_incremental/machine.tm new file mode 100644 index 0000000000000000000000000000000000000000..0e1b0438deab0947a3624d9ac94a76e648715c4b --- /dev/null +++ b/UD_any/tokeparser_incremental/machine.tm @@ -0,0 +1,35 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental/morpho.cla b/UD_any/tokeparser_incremental/morpho.cla new file mode 100644 index 0000000000000000000000000000000000000000..63340a0680b19df22e1b79b5b2e03de822e31ee9 --- /dev/null +++ b/UD_any/tokeparser_incremental/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental/normal.tm b/UD_any/tokeparser_incremental/normal.tm new file mode 100644 index 0000000000000000000000000000000000000000..77ebff3a44bbe8eab9cff6ccad1442f5be33ad7e --- /dev/null +++ b/UD_any/tokeparser_incremental/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental/parser.cla b/UD_any/tokeparser_incremental/parser.cla new file mode 100644 index 0000000000000000000000000000000000000000..77714bee6a35fd391bea92997f34e1f63536a530 --- /dev/null +++ b/UD_any/tokeparser_incremental/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/parser_nofuture.fm +Action Set : data/parser.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental/strategy.cla b/UD_any/tokeparser_incremental/strategy.cla new file mode 100644 index 0000000000000000000000000000000000000000..fcf66b5327394529227bad9f0bad3829ec50e052 --- /dev/null +++ b/UD_any/tokeparser_incremental/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental/tagger.cla b/UD_any/tokeparser_incremental/tagger.cla new file mode 100644 index 0000000000000000000000000000000000000000..9aa35ed9e2fd2ec2fc6b13053c441851cbac4892 --- /dev/null +++ b/UD_any/tokeparser_incremental/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental/test.bd b/UD_any/tokeparser_incremental/test.bd new file mode 100644 index 0000000000000000000000000000000000000000..b2ef99610702da83ccf3870556882c973892fc30 --- /dev/null +++ b/UD_any/tokeparser_incremental/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental/tokenizer.cla b/UD_any/tokeparser_incremental/tokenizer.cla new file mode 100644 index 0000000000000000000000000000000000000000..127a62e59b684afc2f9cce2018f96af5d8dc3b80 --- /dev/null +++ b/UD_any/tokeparser_incremental/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (500,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental/tokeparser.dicts b/UD_any/tokeparser_incremental/tokeparser.dicts new file mode 100644 index 0000000000000000000000000000000000000000..01ff2b92ed209c698db8b5e4868c9b3ea15d405c --- /dev/null +++ b/UD_any/tokeparser_incremental/tokeparser.dicts @@ -0,0 +1,62 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_bool 02 Embeddings +Tokenizer_int 05 Embeddings +Tokenizer_letters 30 Embeddings +Tokenizer_form 30 Embeddings +Tokenizer_form.f 30 Embeddings +Tokenizer_actions 05 Embeddings +Tokenizer_entropy 05 Embeddings +Tokenizer_pos 18 Embeddings +Tokenizer_morpho 22 Embeddings +Tokenizer_eos 16 Embeddings +# TAGGER +Tagger_actions 18 Embeddings _ +Tagger_bool 16 Embeddings _ +Tagger_int 16 Embeddings _ +Tagger_eos 16 Embeddings _ +Tagger_gov 16 Embeddings _ +Tagger_pos 18 Embeddings _ +Tagger_form 30 Embeddings _ +Tagger_form.f 30 Embeddings _ +Tagger_lemma 30 Embeddings _ +Tagger_letters 30 Embeddings _ +Tagger_labels 18 Embeddings _ +Tagger_morpho 22 Embeddings _ +# MORPHO +Morpho_actions 18 Embeddings _ +Morpho_bool 16 Embeddings _ +Morpho_int 16 Embeddings _ +Morpho_eos 16 Embeddings _ +Morpho_gov 16 Embeddings _ +Morpho_pos 18 Embeddings _ +Morpho_form 30 Embeddings _ +Morpho_form.f 30 Embeddings _ +Morpho_lemma 30 Embeddings _ +Morpho_letters 30 Embeddings _ +Morpho_labels 18 Embeddings _ +Morpho_morpho 22 Embeddings _ +# LEMMATIZER +Lemmatizer_Rules_form 30 Embeddings +Lemmatizer_Rules_letters 10 Embeddings +Lemmatizer_Rules_pos 30 Embeddings +Lemmatizer_Rules_morpho 30 Embeddings +# LEMMATIZER +Lemmatizer_Case_form 30 Embeddings +Lemmatizer_Case_letters 10 Embeddings +Lemmatizer_Case_pos 30 Embeddings +Lemmatizer_Case_morpho 30 Embeddings +# PARSER +Parser_actions 18 Embeddings _ +Parser_bool 16 Embeddings _ +Parser_int 16 Embeddings _ +Parser_eos 16 Embeddings _ +Parser_gov 16 Embeddings _ +Parser_pos 18 Embeddings _ +Parser_form 30 Embeddings _ +Parser_form.f 30 Embeddings _ +Parser_lemma 30 Embeddings _ +Parser_letters 30 Embeddings _ +Parser_labels 18 Embeddings _ +Parser_morpho 22 Embeddings _ diff --git a/UD_any/tokeparser_incremental/train.bd b/UD_any/tokeparser_incremental/train.bd new file mode 100644 index 0000000000000000000000000000000000000000..963c311cf239f7a4dfc4e5649efd3a79648a14bc --- /dev/null +++ b/UD_any/tokeparser_incremental/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0