From 064bab33daa80d01fc83c4f42a79b3d0b514126a Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Tue, 19 Nov 2019 16:05:23 +0100 Subject: [PATCH] For each dict of UD_any, added its capacity --- UD_any/lemmatizer/lemmatizer.dicts | 16 +- UD_any/morpho_parts/morpho.dicts | 16 +- UD_any/morpho_whole/morpho.dicts | 16 +- UD_any/parser/parser.dicts | 24 ++- UD_any/tagger/tagger.dicts | 17 +-- UD_any/tagparser/tagparser.dicts | 88 +++++------ UD_any/tagparser/test.bd | 2 +- UD_any/tagparser/train.bd | 2 +- UD_any/tagparser_sequential/tagparser.dicts | 88 +++++------ .../tagparser.dicts | 88 +++++------ UD_any/tokenizer/tokenizer.dicts | 13 +- UD_any/tokeparser/tokeparser.dicts | 137 ++++++++++------- .../tokeparser_incremental/tokeparser.dicts | 143 +++++++++++------- 13 files changed, 357 insertions(+), 293 deletions(-) diff --git a/UD_any/lemmatizer/lemmatizer.dicts b/UD_any/lemmatizer/lemmatizer.dicts index 9275bd1..1bafa4c 100644 --- a/UD_any/lemmatizer/lemmatizer.dicts +++ b/UD_any/lemmatizer/lemmatizer.dicts @@ -1,11 +1,11 @@ #Name Dimension Mode # ############################################# -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 25 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 ############################################# -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 20000 +Lemmatizer_Case_pos 30 Embeddings _ 25 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 diff --git a/UD_any/morpho_parts/morpho.dicts b/UD_any/morpho_parts/morpho.dicts index db352f3..71ebeba 100644 --- a/UD_any/morpho_parts/morpho.dicts +++ b/UD_any/morpho_parts/morpho.dicts @@ -1,13 +1,13 @@ #Name Dimension Mode # ################################### -Morpho_bool 10 Embeddings -Morpho_int 10 Embeddings -Morpho_letters 30 Embeddings -Morpho_pos 30 Embeddings -Morpho_form 100 Embeddings -Morpho_form.f 100 Embeddings -Morpho_morpho 30 Embeddings -Morpho_actions 30 Embeddings +Morpho_bool 10 Embeddings _ 5 +Morpho_int 10 Embeddings _ 200 +Morpho_letters 30 Embeddings _ 200000 +Morpho_pos 30 Embeddings _ 25 +Morpho_form 100 Embeddings _ 50000 +Morpho_form.f 100 Embeddings _ 200000 +Morpho_morpho 30 Embeddings _ 50000 +Morpho_actions 30 Embeddings _ 50 # ERROR_MORPHO Error_Morpho_actions 18 Embeddings _ Error_Morpho_bool 16 Embeddings _ diff --git a/UD_any/morpho_whole/morpho.dicts b/UD_any/morpho_whole/morpho.dicts index db352f3..512162c 100644 --- a/UD_any/morpho_whole/morpho.dicts +++ b/UD_any/morpho_whole/morpho.dicts @@ -1,13 +1,13 @@ #Name Dimension Mode # ################################### -Morpho_bool 10 Embeddings -Morpho_int 10 Embeddings -Morpho_letters 30 Embeddings -Morpho_pos 30 Embeddings -Morpho_form 100 Embeddings -Morpho_form.f 100 Embeddings -Morpho_morpho 30 Embeddings -Morpho_actions 30 Embeddings +Morpho_bool 10 Embeddings _ 5 +Morpho_int 10 Embeddings _ 200 +Morpho_letters 30 Embeddings _ 200000 +Morpho_pos 30 Embeddings _ 25 +Morpho_form 100 Embeddings _ 50000 +Morpho_form.f 100 Embeddings _ 200000 +Morpho_morpho 30 Embeddings _ 50000 +Morpho_actions 30 Embeddings _ 50000 # ERROR_MORPHO Error_Morpho_actions 18 Embeddings _ Error_Morpho_bool 16 Embeddings _ diff --git a/UD_any/parser/parser.dicts b/UD_any/parser/parser.dicts index ed73000..1c5346f 100644 --- a/UD_any/parser/parser.dicts +++ b/UD_any/parser/parser.dicts @@ -1,17 +1,16 @@ #Name Dimension Mode PretrainedFilename # ######################################################## -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ -Parser_sgn 20 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_pos 18 Embeddings _ 25 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 200000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 200000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 ######################################################## Error_Parser_actions 18 Embeddings _ Error_Parser_bool 16 Embeddings _ @@ -23,4 +22,3 @@ Error_Parser_lemma 30 Embeddings _ Error_Parser_letters 30 Embeddings _ Error_Parser_labels 18 Embeddings _ Error_Parser_morpho 22 Embeddings _ -Error_Parser_sgn 20 Embeddings _ diff --git a/UD_any/tagger/tagger.dicts b/UD_any/tagger/tagger.dicts index 74b2522..161ec3d 100644 --- a/UD_any/tagger/tagger.dicts +++ b/UD_any/tagger/tagger.dicts @@ -7,16 +7,14 @@ #Tagger_form 30 Embeddings ../../../data/ftb/Tagger_form.dict #Tagger_sgn 10 Embeddings ../../../data/ftb/Tagger_sgn.dict #Tagger_actions 05 Embeddings ../../../data/ftb/Tagger_actions.dict -#Tagger_entropy 05 Embeddings ../../../data/ftb/Tagger_entropy.dict ######################################################################### -Tagger_bool 02 Embeddings -Tagger_int 05 Embeddings -Tagger_letters 30 Embeddings -Tagger_pos 15 Embeddings -Tagger_form 30 Embeddings -Tagger_form.f 30 Embeddings -Tagger_actions 05 Embeddings -Tagger_entropy 05 Embeddings +Tagger_bool 02 Embeddings _ 5 +Tagger_int 05 Embeddings _ 200 +Tagger_letters 30 Embeddings _ 200000 +Tagger_pos 15 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_actions 05 Embeddings _ 21 ######################################################################### Error_Tagger_bool 02 Embeddings Error_Tagger_int 05 Embeddings @@ -25,4 +23,3 @@ Error_Tagger_pos 15 Embeddings Error_Tagger_form 30 Embeddings Error_Tagger_sgn 10 Embeddings Error_Tagger_actions 05 Embeddings -Error_Tagger_entropy 05 Embeddings diff --git a/UD_any/tagparser/tagparser.dicts b/UD_any/tagparser/tagparser.dicts index 18a0ab6..c328e31 100644 --- a/UD_any/tagparser/tagparser.dicts +++ b/UD_any/tagparser/tagparser.dicts @@ -1,54 +1,54 @@ #Name Dimension Mode # ############################ # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser/test.bd b/UD_any/tagparser/test.bd index f840332..ae0bfa6 100644 --- a/UD_any/tagparser/test.bd +++ b/UD_any/tagparser/test.bd @@ -6,6 +6,6 @@ 4 XPOS hyp pos Final 1 5 MORPHO hyp morpho Final 1 2 LEMMA hyp form Final 1 -6 GOV hyp int Final 1 +6 GOV hyp gov Final 1 7 LABEL hyp labels Final 1 0 EOS hyp eos Final 0 diff --git a/UD_any/tagparser/train.bd b/UD_any/tagparser/train.bd index b4073cc..0d725f6 100644 --- a/UD_any/tagparser/train.bd +++ b/UD_any/tagparser/train.bd @@ -6,6 +6,6 @@ 4 XPOS hyp pos FromZero 1 5 MORPHO hyp morpho FromZero 1 2 LEMMA hyp form FromZero 1 -6 GOV hyp int FromZero 1 +6 GOV hyp gov FromZero 1 7 LABEL hyp labels FromZero 1 0 EOS hyp eos FromZero 0 diff --git a/UD_any/tagparser_sequential/tagparser.dicts b/UD_any/tagparser_sequential/tagparser.dicts index 18a0ab6..c328e31 100644 --- a/UD_any/tagparser_sequential/tagparser.dicts +++ b/UD_any/tagparser_sequential/tagparser.dicts @@ -1,54 +1,54 @@ #Name Dimension Mode # ############################ # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser_sequential_strong/tagparser.dicts b/UD_any/tagparser_sequential_strong/tagparser.dicts index 0634365..c328e31 100644 --- a/UD_any/tagparser_sequential_strong/tagparser.dicts +++ b/UD_any/tagparser_sequential_strong/tagparser.dicts @@ -1,54 +1,54 @@ #Name Dimension Mode # ############################ # TAGGER -Tagger_actions 30 Embeddings _ -Tagger_bool 30 Embeddings _ -Tagger_int 30 Embeddings _ -Tagger_eos 30 Embeddings _ -Tagger_gov 30 Embeddings _ -Tagger_pos 30 Embeddings _ -Tagger_form 100 Embeddings _ -Tagger_form.f 100 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 30 Embeddings _ -Tagger_morpho 30 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 30 Embeddings _ -Morpho_bool 30 Embeddings _ -Morpho_int 30 Embeddings _ -Morpho_eos 30 Embeddings _ -Morpho_gov 30 Embeddings _ -Morpho_pos 30 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 100 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 30 Embeddings _ -Morpho_morpho 30 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 100 Embeddings -Lemmatizer_Rules_letters 30 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 100 Embeddings -Lemmatizer_Case_letters 30 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 30 Embeddings _ -Parser_bool 30 Embeddings _ -Parser_int 30 Embeddings _ -Parser_eos 30 Embeddings _ -Parser_gov 30 Embeddings _ -Parser_pos 30 Embeddings _ -Parser_form 100 Embeddings _ -Parser_form.f 100 Embeddings _ -Parser_lemma 100 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 30 Embeddings _ -Parser_morpho 30 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tokenizer/tokenizer.dicts b/UD_any/tokenizer/tokenizer.dicts index 0902ec8..89f79a6 100644 --- a/UD_any/tokenizer/tokenizer.dicts +++ b/UD_any/tokenizer/tokenizer.dicts @@ -1,8 +1,7 @@ ######################################################################### -Tokenizer_bool 02 Embeddings -Tokenizer_int 05 Embeddings -Tokenizer_letters 30 Embeddings -Tokenizer_form 30 Embeddings -Tokenizer_form.f 30 Embeddings -Tokenizer_actions 05 Embeddings -Tokenizer_entropy 05 Embeddings +Tokenizer_bool 02 Embeddings _ 5 +Tokenizer_int 05 Embeddings _ 200 +Tokenizer_letters 30 Embeddings _ 200000 +Tokenizer_form 30 Embeddings _ 50000 +Tokenizer_form.f 30 Embeddings _ 200000 +Tokenizer_actions 05 Embeddings _ 200 diff --git a/UD_any/tokeparser/tokeparser.dicts b/UD_any/tokeparser/tokeparser.dicts index afc2edf..4534960 100644 --- a/UD_any/tokeparser/tokeparser.dicts +++ b/UD_any/tokeparser/tokeparser.dicts @@ -1,59 +1,94 @@ #Name Dimension Mode # ############################ # TOKENIZER -Tokenizer_bool 02 Embeddings -Tokenizer_int 05 Embeddings -Tokenizer_letters 30 Embeddings -Tokenizer_form 30 Embeddings -Tokenizer_form.f 30 Embeddings -Tokenizer_actions 05 Embeddings -Tokenizer_entropy 05 Embeddings +Tokenizer_bool 02 Embeddings _ 5 +Tokenizer_int 05 Embeddings _ 200 +Tokenizer_letters 30 Embeddings _ 200000 +Tokenizer_form 30 Embeddings _ 50000 +Tokenizer_form.f 30 Embeddings _ 200000 +Tokenizer_actions 05 Embeddings _ 200 # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 +# ERROR_TAGGER +Error_Tagger_actions 18 Embeddings _ +Error_Tagger_bool 16 Embeddings _ +Error_Tagger_int 16 Embeddings _ +Error_Tagger_eos 16 Embeddings _ +Error_Tagger_gov 16 Embeddings _ +Error_Tagger_pos 18 Embeddings _ +Error_Tagger_form 30 Embeddings _ +Error_Tagger_lemma 30 Embeddings _ +Error_Tagger_letters 30 Embeddings _ +Error_Tagger_labels 18 Embeddings _ +Error_Tagger_morpho 22 Embeddings _ +# ERROR_MORPHO +Error_Morpho_actions 18 Embeddings _ +Error_Morpho_bool 16 Embeddings _ +Error_Morpho_int 16 Embeddings _ +Error_Morpho_eos 16 Embeddings _ +Error_Morpho_gov 16 Embeddings _ +Error_Morpho_pos 18 Embeddings _ +Error_Morpho_form 30 Embeddings _ +Error_Morpho_lemma 30 Embeddings _ +Error_Morpho_letters 30 Embeddings _ +Error_Morpho_labels 18 Embeddings _ +Error_Morpho_morpho 22 Embeddings _ +# ERROR_PARSER +Error_Parser_actions 18 Embeddings _ +Error_Parser_bool 16 Embeddings _ +Error_Parser_int 16 Embeddings _ +Error_Parser_eos 16 Embeddings _ +Error_Parser_gov 16 Embeddings _ +Error_Parser_pos 18 Embeddings _ +Error_Parser_form 30 Embeddings _ +Error_Parser_lemma 30 Embeddings _ +Error_Parser_letters 30 Embeddings _ +Error_Parser_labels 18 Embeddings _ +Error_Parser_morpho 22 Embeddings _ diff --git a/UD_any/tokeparser_incremental/tokeparser.dicts b/UD_any/tokeparser_incremental/tokeparser.dicts index 01ff2b9..7b7fd2b 100644 --- a/UD_any/tokeparser_incremental/tokeparser.dicts +++ b/UD_any/tokeparser_incremental/tokeparser.dicts @@ -1,62 +1,97 @@ #Name Dimension Mode # ############################ # TOKENIZER -Tokenizer_bool 02 Embeddings -Tokenizer_int 05 Embeddings -Tokenizer_letters 30 Embeddings -Tokenizer_form 30 Embeddings -Tokenizer_form.f 30 Embeddings -Tokenizer_actions 05 Embeddings -Tokenizer_entropy 05 Embeddings -Tokenizer_pos 18 Embeddings -Tokenizer_morpho 22 Embeddings -Tokenizer_eos 16 Embeddings +Tokenizer_bool 02 Embeddings _ 5 +Tokenizer_int 05 Embeddings _ 200 +Tokenizer_letters 30 Embeddings _ 200000 +Tokenizer_form 30 Embeddings _ 50000 +Tokenizer_form.f 30 Embeddings _ 200000 +Tokenizer_actions 05 Embeddings _ 200 +Tokenizer_pos 05 Embeddings _ 25 +Tokenizer_morpho 05 Embeddings _ 50000 +Tokenizer_eos 05 Embeddings _ 5 # TAGGER -Tagger_actions 18 Embeddings _ -Tagger_bool 16 Embeddings _ -Tagger_int 16 Embeddings _ -Tagger_eos 16 Embeddings _ -Tagger_gov 16 Embeddings _ -Tagger_pos 18 Embeddings _ -Tagger_form 30 Embeddings _ -Tagger_form.f 30 Embeddings _ -Tagger_lemma 30 Embeddings _ -Tagger_letters 30 Embeddings _ -Tagger_labels 18 Embeddings _ -Tagger_morpho 22 Embeddings _ +Tagger_actions 18 Embeddings _ 20 +Tagger_bool 16 Embeddings _ 5 +Tagger_int 16 Embeddings _ 200 +Tagger_eos 16 Embeddings _ 5 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 21 +Tagger_form 30 Embeddings _ 50000 +Tagger_form.f 30 Embeddings _ 200000 +Tagger_lemma 30 Embeddings _ 50000 +Tagger_letters 30 Embeddings _ 200000 +Tagger_labels 18 Embeddings _ 50 +Tagger_morpho 22 Embeddings _ 50000 # MORPHO -Morpho_actions 18 Embeddings _ -Morpho_bool 16 Embeddings _ -Morpho_int 16 Embeddings _ -Morpho_eos 16 Embeddings _ -Morpho_gov 16 Embeddings _ -Morpho_pos 18 Embeddings _ -Morpho_form 30 Embeddings _ -Morpho_form.f 30 Embeddings _ -Morpho_lemma 30 Embeddings _ -Morpho_letters 30 Embeddings _ -Morpho_labels 18 Embeddings _ -Morpho_morpho 22 Embeddings _ +Morpho_actions 18 Embeddings _ 1000 +Morpho_bool 16 Embeddings _ 5 +Morpho_int 16 Embeddings _ 200 +Morpho_eos 16 Embeddings _ 5 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 21 +Morpho_form 30 Embeddings _ 50000 +Morpho_form.f 30 Embeddings _ 20000 +Morpho_lemma 30 Embeddings _ 50000 +Morpho_letters 30 Embeddings _ 20000 +Morpho_labels 18 Embeddings _ 50 +Morpho_morpho 22 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Rules_form 30 Embeddings -Lemmatizer_Rules_letters 10 Embeddings -Lemmatizer_Rules_pos 30 Embeddings -Lemmatizer_Rules_morpho 30 Embeddings +Lemmatizer_Rules_form 30 Embeddings _ 50000 +Lemmatizer_Rules_letters 10 Embeddings _ 200000 +Lemmatizer_Rules_pos 30 Embeddings _ 21 +Lemmatizer_Rules_morpho 30 Embeddings _ 50000 # LEMMATIZER -Lemmatizer_Case_form 30 Embeddings -Lemmatizer_Case_letters 10 Embeddings -Lemmatizer_Case_pos 30 Embeddings -Lemmatizer_Case_morpho 30 Embeddings +Lemmatizer_Case_form 30 Embeddings _ 50000 +Lemmatizer_Case_letters 10 Embeddings _ 200000 +Lemmatizer_Case_pos 30 Embeddings _ 21 +Lemmatizer_Case_morpho 30 Embeddings _ 50000 # PARSER -Parser_actions 18 Embeddings _ -Parser_bool 16 Embeddings _ -Parser_int 16 Embeddings _ -Parser_eos 16 Embeddings _ -Parser_gov 16 Embeddings _ -Parser_pos 18 Embeddings _ -Parser_form 30 Embeddings _ -Parser_form.f 30 Embeddings _ -Parser_lemma 30 Embeddings _ -Parser_letters 30 Embeddings _ -Parser_labels 18 Embeddings _ -Parser_morpho 22 Embeddings _ +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 5 +Parser_int 16 Embeddings _ 200 +Parser_eos 16 Embeddings _ 5 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 21 +Parser_form 30 Embeddings _ 50000 +Parser_form.f 30 Embeddings _ 20000 +Parser_lemma 30 Embeddings _ 50000 +Parser_letters 30 Embeddings _ 20000 +Parser_labels 18 Embeddings _ 50 +Parser_morpho 22 Embeddings _ 50000 +# ERROR_TAGGER +Error_Tagger_actions 18 Embeddings _ +Error_Tagger_bool 16 Embeddings _ +Error_Tagger_int 16 Embeddings _ +Error_Tagger_eos 16 Embeddings _ +Error_Tagger_gov 16 Embeddings _ +Error_Tagger_pos 18 Embeddings _ +Error_Tagger_form 30 Embeddings _ +Error_Tagger_lemma 30 Embeddings _ +Error_Tagger_letters 30 Embeddings _ +Error_Tagger_labels 18 Embeddings _ +Error_Tagger_morpho 22 Embeddings _ +# ERROR_MORPHO +Error_Morpho_actions 18 Embeddings _ +Error_Morpho_bool 16 Embeddings _ +Error_Morpho_int 16 Embeddings _ +Error_Morpho_eos 16 Embeddings _ +Error_Morpho_gov 16 Embeddings _ +Error_Morpho_pos 18 Embeddings _ +Error_Morpho_form 30 Embeddings _ +Error_Morpho_lemma 30 Embeddings _ +Error_Morpho_letters 30 Embeddings _ +Error_Morpho_labels 18 Embeddings _ +Error_Morpho_morpho 22 Embeddings _ +# ERROR_PARSER +Error_Parser_actions 18 Embeddings _ +Error_Parser_bool 16 Embeddings _ +Error_Parser_int 16 Embeddings _ +Error_Parser_eos 16 Embeddings _ +Error_Parser_gov 16 Embeddings _ +Error_Parser_pos 18 Embeddings _ +Error_Parser_form 30 Embeddings _ +Error_Parser_lemma 30 Embeddings _ +Error_Parser_letters 30 Embeddings _ +Error_Parser_labels 18 Embeddings _ +Error_Parser_morpho 22 Embeddings _ -- GitLab