diff --git a/UD_any/lemmatizer/lemmatizer.dicts b/UD_any/lemmatizer/lemmatizer.dicts index 9275bd1c016e97bba546654a5dad2469c2aad861..1bafa4ca0b1787a06eed0a0c3b8e632414bc6045 100644 --- a/UD_any/lemmatizer/lemmatizer.dicts +++ b/UD_any/lemmatizer/lemmatizer.dicts @@ -1,11 +1,11 @@ #Name Dimension Mode # ############################################# -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 25 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 ############################################# -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 20000 +Lemmatizer_Case_pos 30 Embeddings _ 25 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 diff --git a/UD_any/morpho_parts/morpho.dicts b/UD_any/morpho_parts/morpho.dicts index db352f3de955ee20afcd2e3959bc77e1a78b898e..71ebeba855cdc62e11cb0bfa6286d374473ecfac 100644 --- a/UD_any/morpho_parts/morpho.dicts +++ b/UD_any/morpho_parts/morpho.dicts @@ -1,13 +1,13 @@ #Name Dimension Mode # ################################### -Morpho_bool 10 Embeddings -Morpho_int 10 Embeddings -Morpho_letters 30 Embeddings -Morpho_pos 30 Embeddings -Morpho_form 100 Embeddings -Morpho_form.f 100 Embeddings -Morpho_morpho 30 Embeddings -Morpho_actions 30 Embeddings +Morpho_bool 10 Embeddings _ 5 +Morpho_int 10 Embeddings _ 200 +Morpho_letters 30 Embeddings _ 200000 +Morpho_pos 30 Embeddings _ 25 +Morpho_form 100 Embeddings _ 50000 +Morpho_form.f 100 Embeddings _ 200000 +Morpho_morpho 30 Embeddings _ 50000 +Morpho_actions 30 Embeddings _ 50 # ERROR_MORPHO Error_Morpho_actions 18 Embeddings _ Error_Morpho_bool 16 Embeddings _ diff --git a/UD_any/morpho_whole/morpho.dicts b/UD_any/morpho_whole/morpho.dicts index db352f3de955ee20afcd2e3959bc77e1a78b898e..512162c7b668546cbab613b3339e7441e1eed358 100644 --- a/UD_any/morpho_whole/morpho.dicts +++ b/UD_any/morpho_whole/morpho.dicts @@ -1,13 +1,13 @@ #Name Dimension Mode # ################################### -Morpho_bool 10 Embeddings -Morpho_int 10 Embeddings -Morpho_letters 30 Embeddings -Morpho_pos 30 Embeddings -Morpho_form 100 Embeddings -Morpho_form.f 100 Embeddings -Morpho_morpho 30 Embeddings -Morpho_actions 30 Embeddings +Morpho_bool 10 Embeddings _ 5 +Morpho_int 10 Embeddings _ 200 +Morpho_letters 30 Embeddings _ 200000 +Morpho_pos 30 Embeddings _ 25 +Morpho_form 100 Embeddings _ 50000 +Morpho_form.f 100 Embeddings _ 200000 +Morpho_morpho 30 Embeddings _ 50000 +Morpho_actions 30 Embeddings _ 50000 # ERROR_MORPHO Error_Morpho_actions 18 Embeddings _ Error_Morpho_bool 16 Embeddings _ diff --git a/UD_any/parser/parser.dicts b/UD_any/parser/parser.dicts index ed7300047e77f3e1b5d849606246c9e082b24d0f..1c5346f92d4057237b89cef3d337b667526ff540 100644 --- a/UD_any/parser/parser.dicts +++ b/UD_any/parser/parser.dicts @@ -1,17 +1,16 @@ #Name Dimension Mode PretrainedFilename # ######################################################## -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ -Parser_sgn 20 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_pos 18 Embeddings _ 25 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 200000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 200000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 ######################################################## Error_Parser_actions 18 Embeddings _ Error_Parser_bool 16 Embeddings _ @@ -23,4 +22,3 @@ Error_Parser_lemma 30 Embeddings _ Error_Parser_letters 30 Embeddings _ Error_Parser_labels 18 Embeddings _ Error_Parser_morpho 22 Embeddings _ -Error_Parser_sgn 20 Embeddings _ diff --git a/UD_any/tagger/tagger.dicts b/UD_any/tagger/tagger.dicts index 74b2522768329004111100d7190ee736201be3aa..161ec3d0ef598a0c87318fe1601cbf674939211f 100644 --- a/UD_any/tagger/tagger.dicts +++ b/UD_any/tagger/tagger.dicts @@ -7,16 +7,14 @@ #Tagger_form 30 Embeddings ../../../data/ftb/Tagger_form.dict #Tagger_sgn 10 Embeddings ../../../data/ftb/Tagger_sgn.dict #Tagger_actions 05 Embeddings ../../../data/ftb/Tagger_actions.dict -#Tagger_entropy 05 Embeddings ../../../data/ftb/Tagger_entropy.dict ######################################################################### -Tagger_bool 02 Embeddings -Tagger_int 05 Embeddings -Tagger_letters 30 Embeddings -Tagger_pos 15 Embeddings -Tagger_form 30 Embeddings -Tagger_form.f 30 Embeddings -Tagger_actions 05 Embeddings -Tagger_entropy 05 Embeddings +Tagger_bool 02 Embeddings _ 5 +Tagger_int 05 Embeddings _ 200 +Tagger_letters 30 Embeddings _ 200000 +Tagger_pos 15 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_actions 05 Embeddings _ 21 ######################################################################### Error_Tagger_bool 02 Embeddings Error_Tagger_int 05 Embeddings @@ -25,4 +23,3 @@ Error_Tagger_pos 15 Embeddings Error_Tagger_form 30 Embeddings Error_Tagger_sgn 10 Embeddings Error_Tagger_actions 05 Embeddings -Error_Tagger_entropy 05 Embeddings diff --git a/UD_any/tagparser/tagparser.dicts b/UD_any/tagparser/tagparser.dicts index 18a0ab620d32c64bf6600516e44d6260b2ce3a36..c328e311736e2046fa2c847b03c0e107c88d96fe 100644 --- a/UD_any/tagparser/tagparser.dicts +++ b/UD_any/tagparser/tagparser.dicts @@ -1,54 +1,54 @@ #Name Dimension Mode # ############################ # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser/test.bd b/UD_any/tagparser/test.bd index f840332126dcc0452d7860e00688bccdde7b99f0..ae0bfa67224df2e2496ac77f1ff593b0095e7a50 100644 --- a/UD_any/tagparser/test.bd +++ b/UD_any/tagparser/test.bd @@ -6,6 +6,6 @@ 4 XPOS hyp pos Final 1 5 MORPHO hyp morpho Final 1 2 LEMMA hyp form Final 1 -6 GOV hyp int Final 1 +6 GOV hyp gov Final 1 7 LABEL hyp labels Final 1 0 EOS hyp eos Final 0 diff --git a/UD_any/tagparser/train.bd b/UD_any/tagparser/train.bd index b4073cc23be3d21a9c62446f17a1ca5dc8bc72fa..0d725f677db1f86de4d7e8fd329b109ebc816c87 100644 --- a/UD_any/tagparser/train.bd +++ b/UD_any/tagparser/train.bd @@ -6,6 +6,6 @@ 4 XPOS hyp pos FromZero 1 5 MORPHO hyp morpho FromZero 1 2 LEMMA hyp form FromZero 1 -6 GOV hyp int FromZero 1 +6 GOV hyp gov FromZero 1 7 LABEL hyp labels FromZero 1 0 EOS hyp eos FromZero 0 diff --git a/UD_any/tagparser_sequential/tagparser.dicts b/UD_any/tagparser_sequential/tagparser.dicts index 18a0ab620d32c64bf6600516e44d6260b2ce3a36..c328e311736e2046fa2c847b03c0e107c88d96fe 100644 --- a/UD_any/tagparser_sequential/tagparser.dicts +++ b/UD_any/tagparser_sequential/tagparser.dicts @@ -1,54 +1,54 @@ #Name Dimension Mode # ############################ # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser_sequential_strong/tagparser.dicts b/UD_any/tagparser_sequential_strong/tagparser.dicts index 06343655036e31210ba605ca7a2abdd486529b08..c328e311736e2046fa2c847b03c0e107c88d96fe 100644 --- a/UD_any/tagparser_sequential_strong/tagparser.dicts +++ b/UD_any/tagparser_sequential_strong/tagparser.dicts @@ -1,54 +1,54 @@ #Name Dimension Mode # ############################ # TAGGER -Tagger_actions 30 Embeddings _ -Tagger_bool 30 Embeddings _ -Tagger_int 30 Embeddings _ -Tagger_eos 30 Embeddings _ -Tagger_gov 30 Embeddings _ -Tagger_pos 30 Embeddings _ -Tagger_form 100 Embeddings _ -Tagger_form.f 100 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 30 Embeddings _ -Tagger_morpho 30 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 30 Embeddings _ -Morpho_bool 30 Embeddings _ -Morpho_int 30 Embeddings _ -Morpho_eos 30 Embeddings _ -Morpho_gov 30 Embeddings _ -Morpho_pos 30 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 100 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 30 Embeddings _ -Morpho_morpho 30 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 100 Embeddings -Lemmatizer_Rules_letters 30 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 100 Embeddings -Lemmatizer_Case_letters 30 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 30 Embeddings _ -Parser_bool 30 Embeddings _ -Parser_int 30 Embeddings _ -Parser_eos 30 Embeddings _ -Parser_gov 30 Embeddings _ -Parser_pos 30 Embeddings _ -Parser_form 100 Embeddings _ -Parser_form.f 100 Embeddings _ -Parser_lemma 100 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 30 Embeddings _ -Parser_morpho 30 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tokenizer/tokenizer.dicts b/UD_any/tokenizer/tokenizer.dicts index 0902ec88e90d2a819eb98f35f03792e26b395e69..89f79a68702559bb56ea106d13c223cd75909424 100644 --- a/UD_any/tokenizer/tokenizer.dicts +++ b/UD_any/tokenizer/tokenizer.dicts @@ -1,8 +1,7 @@ ######################################################################### -Tokenizer_bool 02 Embeddings -Tokenizer_int 05 Embeddings -Tokenizer_letters 30 Embeddings -Tokenizer_form 30 Embeddings -Tokenizer_form.f 30 Embeddings -Tokenizer_actions 05 Embeddings -Tokenizer_entropy 05 Embeddings +Tokenizer_bool 02 Embeddings _ 5 +Tokenizer_int 05 Embeddings _ 200 +Tokenizer_letters 30 Embeddings _ 200000 +Tokenizer_form 30 Embeddings _ 50000 +Tokenizer_form.f 30 Embeddings _ 200000 +Tokenizer_actions 05 Embeddings _ 200 diff --git a/UD_any/tokeparser/tokeparser.dicts b/UD_any/tokeparser/tokeparser.dicts index afc2edf9803300492d257ae9c9fa0b0ef17625e0..4534960f51830c6ceca5c08338ad7eb604b34e55 100644 --- a/UD_any/tokeparser/tokeparser.dicts +++ b/UD_any/tokeparser/tokeparser.dicts @@ -1,59 +1,94 @@ #Name Dimension Mode # ############################ # TOKENIZER -Tokenizer_bool 02 Embeddings -Tokenizer_int 05 Embeddings -Tokenizer_letters 30 Embeddings -Tokenizer_form 30 Embeddings -Tokenizer_form.f 30 Embeddings -Tokenizer_actions 05 Embeddings -Tokenizer_entropy 05 Embeddings +Tokenizer_bool 02 Embeddings _ 5 +Tokenizer_int 05 Embeddings _ 200 +Tokenizer_letters 30 Embeddings _ 200000 +Tokenizer_form 30 Embeddings _ 50000 +Tokenizer_form.f 30 Embeddings _ 200000 +Tokenizer_actions 05 Embeddings _ 200 # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 +# ERROR_TAGGER +Error_Tagger_actions 18 Embeddings _ +Error_Tagger_bool 16 Embeddings _ +Error_Tagger_int 16 Embeddings _ +Error_Tagger_eos 16 Embeddings _ +Error_Tagger_gov 16 Embeddings _ +Error_Tagger_pos 18 Embeddings _ +Error_Tagger_form 30 Embeddings _ +Error_Tagger_lemma 30 Embeddings _ +Error_Tagger_letters 30 Embeddings _ +Error_Tagger_labels 18 Embeddings _ +Error_Tagger_morpho 22 Embeddings _ +# ERROR_MORPHO +Error_Morpho_actions 18 Embeddings _ +Error_Morpho_bool 16 Embeddings _ +Error_Morpho_int 16 Embeddings _ +Error_Morpho_eos 16 Embeddings _ +Error_Morpho_gov 16 Embeddings _ +Error_Morpho_pos 18 Embeddings _ +Error_Morpho_form 30 Embeddings _ +Error_Morpho_lemma 30 Embeddings _ +Error_Morpho_letters 30 Embeddings _ +Error_Morpho_labels 18 Embeddings _ +Error_Morpho_morpho 22 Embeddings _ +# ERROR_PARSER +Error_Parser_actions 18 Embeddings _ +Error_Parser_bool 16 Embeddings _ +Error_Parser_int 16 Embeddings _ +Error_Parser_eos 16 Embeddings _ +Error_Parser_gov 16 Embeddings _ +Error_Parser_pos 18 Embeddings _ +Error_Parser_form 30 Embeddings _ +Error_Parser_lemma 30 Embeddings _ +Error_Parser_letters 30 Embeddings _ +Error_Parser_labels 18 Embeddings _ +Error_Parser_morpho 22 Embeddings _ diff --git a/UD_any/tokeparser_incremental/tokeparser.dicts b/UD_any/tokeparser_incremental/tokeparser.dicts index 01ff2b92ed209c698db8b5e4868c9b3ea15d405c..7b7fd2b27ca28cfcc0532444450b73e6bd27a6b1 100644 --- a/UD_any/tokeparser_incremental/tokeparser.dicts +++ b/UD_any/tokeparser_incremental/tokeparser.dicts @@ -1,62 +1,97 @@ #Name Dimension Mode # ############################ # TOKENIZER -Tokenizer_bool 02 Embeddings -Tokenizer_int 05 Embeddings -Tokenizer_letters 30 Embeddings -Tokenizer_form 30 Embeddings -Tokenizer_form.f 30 Embeddings -Tokenizer_actions 05 Embeddings -Tokenizer_entropy 05 Embeddings -Tokenizer_pos 18 Embeddings -Tokenizer_morpho 22 Embeddings -Tokenizer_eos 16 Embeddings +Tokenizer_bool 02 Embeddings _ 5 +Tokenizer_int 05 Embeddings _ 200 +Tokenizer_letters 30 Embeddings _ 200000 +Tokenizer_form 30 Embeddings _ 50000 +Tokenizer_form.f 30 Embeddings _ 200000 +Tokenizer_actions 05 Embeddings _ 200 +Tokenizer_pos 05 Embeddings _ 25 +Tokenizer_morpho 05 Embeddings _ 50000 +Tokenizer_eos 05 Embeddings _ 5 # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 +# ERROR_TAGGER +Error_Tagger_actions 18 Embeddings _ +Error_Tagger_bool 16 Embeddings _ +Error_Tagger_int 16 Embeddings _ +Error_Tagger_eos 16 Embeddings _ +Error_Tagger_gov 16 Embeddings _ +Error_Tagger_pos 18 Embeddings _ +Error_Tagger_form 30 Embeddings _ +Error_Tagger_lemma 30 Embeddings _ +Error_Tagger_letters 30 Embeddings _ +Error_Tagger_labels 18 Embeddings _ +Error_Tagger_morpho 22 Embeddings _ +# ERROR_MORPHO +Error_Morpho_actions 18 Embeddings _ +Error_Morpho_bool 16 Embeddings _ +Error_Morpho_int 16 Embeddings _ +Error_Morpho_eos 16 Embeddings _ +Error_Morpho_gov 16 Embeddings _ +Error_Morpho_pos 18 Embeddings _ +Error_Morpho_form 30 Embeddings _ +Error_Morpho_lemma 30 Embeddings _ +Error_Morpho_letters 30 Embeddings _ +Error_Morpho_labels 18 Embeddings _ +Error_Morpho_morpho 22 Embeddings _ +# ERROR_PARSER +Error_Parser_actions 18 Embeddings _ +Error_Parser_bool 16 Embeddings _ +Error_Parser_int 16 Embeddings _ +Error_Parser_eos 16 Embeddings _ +Error_Parser_gov 16 Embeddings _ +Error_Parser_pos 18 Embeddings _ +Error_Parser_form 30 Embeddings _ +Error_Parser_lemma 30 Embeddings _ +Error_Parser_letters 30 Embeddings _ +Error_Parser_labels 18 Embeddings _ +Error_Parser_morpho 22 Embeddings _