diff --git a/UD_any/tokeparser_sequential/lemmatizer_case.cla b/UD_any/tokeparser_sequential/lemmatizer_case.cla new file mode 100644 index 0000000000000000000000000000000000000000..0994d40c10be7705854b52504754c3d6bf9a1a3b --- /dev/null +++ b/UD_any/tokeparser_sequential/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/lemmatizer_rules.fm +Action Set : data/lemmatizer_case.as +Topology : (100,RELU,0.1) +Dynamic : yes diff --git a/UD_any/tokeparser_sequential/lemmatizer_lookup.cla b/UD_any/tokeparser_sequential/lemmatizer_lookup.cla new file mode 100644 index 0000000000000000000000000000000000000000..41f63b2bdd7c6f77b01de9b737396b6302adf0cb --- /dev/null +++ b/UD_any/tokeparser_sequential/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_sequential/lemmatizer_rules.cla b/UD_any/tokeparser_sequential/lemmatizer_rules.cla new file mode 100644 index 0000000000000000000000000000000000000000..9b59b622f3120d5a6df4bb24a15d1a2d371129ba --- /dev/null +++ b/UD_any/tokeparser_sequential/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/lemmatizer_rules.fm +Action Set : data/lemmatizer_rules.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_sequential/machine.tm b/UD_any/tokeparser_sequential/machine.tm new file mode 100644 index 0000000000000000000000000000000000000000..f591178c203e6b19d86ed8fc136b8ec7c74a505e --- /dev/null +++ b/UD_any/tokeparser_sequential/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_sequential/morpho.cla b/UD_any/tokeparser_sequential/morpho.cla new file mode 100644 index 0000000000000000000000000000000000000000..024d1e99d8541dff950adbccbcd8ae50362e6dd9 --- /dev/null +++ b/UD_any/tokeparser_sequential/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/morpho_nofuture.fm +Action Set : data/morpho_parts.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_sequential/normal.tm b/UD_any/tokeparser_sequential/normal.tm new file mode 100644 index 0000000000000000000000000000000000000000..77ebff3a44bbe8eab9cff6ccad1442f5be33ad7e --- /dev/null +++ b/UD_any/tokeparser_sequential/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_sequential/parser.cla b/UD_any/tokeparser_sequential/parser.cla new file mode 100644 index 0000000000000000000000000000000000000000..77714bee6a35fd391bea92997f34e1f63536a530 --- /dev/null +++ b/UD_any/tokeparser_sequential/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/parser_nofuture.fm +Action Set : data/parser.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_sequential/segmenter.cla b/UD_any/tokeparser_sequential/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/tokeparser_sequential/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_sequential/strategy.cla b/UD_any/tokeparser_sequential/strategy.cla new file mode 100644 index 0000000000000000000000000000000000000000..d21d9a7a923c8fccfc95a0476a966a98bb2cf130 --- /dev/null +++ b/UD_any/tokeparser_sequential/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser_sequential +Oracle Filename : none diff --git a/UD_any/tokeparser_sequential/tagger.cla b/UD_any/tokeparser_sequential/tagger.cla new file mode 100644 index 0000000000000000000000000000000000000000..c98cb92ba6c11bf9f893f23e4522b2682bfde843 --- /dev/null +++ b/UD_any/tokeparser_sequential/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/tagger_nofuture.fm +Action Set : data/tagger.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_sequential/test.bd b/UD_any/tokeparser_sequential/test.bd new file mode 100644 index 0000000000000000000000000000000000000000..b2ef99610702da83ccf3870556882c973892fc30 --- /dev/null +++ b/UD_any/tokeparser_sequential/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_sequential/tokenizer.cla b/UD_any/tokeparser_sequential/tokenizer.cla new file mode 100644 index 0000000000000000000000000000000000000000..cdd5a2ef8235dc0fd7e6043a4adda666f1273713 --- /dev/null +++ b/UD_any/tokeparser_sequential/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/tokenizer.fm +Action Set : data/tokenizer.as +Topology : (500,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_sequential/tokeparser.dicts b/UD_any/tokeparser_sequential/tokeparser.dicts new file mode 100644 index 0000000000000000000000000000000000000000..c9235b91d69bcb166103ad08ad18000edbdc1a25 --- /dev/null +++ b/UD_any/tokeparser_sequential/tokeparser.dicts @@ -0,0 +1,102 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_bool 02 Embeddings _ 5 +Tokenizer_int 05 Embeddings _ 200 +Tokenizer_letters 30 Embeddings _ 200000 +Tokenizer_form 30 Embeddings _ 50000 +Tokenizer_form.f 30 Embeddings _ 200000 +Tokenizer_actions 05 Embeddings _ 200 +# TAGGER +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 +# MORPHO +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 +# LEMMATIZER +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 +# LEMMATIZER +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 +# ERROR_TAGGER +Error_Tagger_actions 18 Embeddings _ +Error_Tagger_bool 16 Embeddings _ +Error_Tagger_int 16 Embeddings _ +Error_Tagger_eos 16 Embeddings _ +Error_Tagger_gov 16 Embeddings _ +Error_Tagger_pos 18 Embeddings _ +Error_Tagger_form 30 Embeddings _ +Error_Tagger_lemma 30 Embeddings _ +Error_Tagger_letters 30 Embeddings _ +Error_Tagger_labels 18 Embeddings _ +Error_Tagger_morpho 22 Embeddings _ +# ERROR_MORPHO +Error_Morpho_actions 18 Embeddings _ +Error_Morpho_bool 16 Embeddings _ +Error_Morpho_int 16 Embeddings _ +Error_Morpho_eos 16 Embeddings _ +Error_Morpho_gov 16 Embeddings _ +Error_Morpho_pos 18 Embeddings _ +Error_Morpho_form 30 Embeddings _ +Error_Morpho_lemma 30 Embeddings _ +Error_Morpho_letters 30 Embeddings _ +Error_Morpho_labels 18 Embeddings _ +Error_Morpho_morpho 22 Embeddings _ +# ERROR_PARSER +Error_Parser_actions 18 Embeddings _ +Error_Parser_bool 16 Embeddings _ +Error_Parser_int 16 Embeddings _ +Error_Parser_eos 16 Embeddings _ +Error_Parser_gov 16 Embeddings _ +Error_Parser_pos 18 Embeddings _ +Error_Parser_form 30 Embeddings _ +Error_Parser_lemma 30 Embeddings _ +Error_Parser_letters 30 Embeddings _ +Error_Parser_labels 18 Embeddings _ +Error_Parser_morpho 22 Embeddings _ diff --git a/UD_any/tokeparser_sequential/train.bd b/UD_any/tokeparser_sequential/train.bd new file mode 100644 index 0000000000000000000000000000000000000000..963c311cf239f7a4dfc4e5649efd3a79648a14bc --- /dev/null +++ b/UD_any/tokeparser_sequential/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0