diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index f424c1001d41ebf4ffff833d1177d8b182a6a5d0..86c5e4ac5b7adbb34772a6f0dc66a76f719b08db 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -15,7 +15,7 @@ FP_FILENAME=fP RULES_FILENAME=lemmatizer_rules.as EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm -all: tokenizer.as texts all_no_test.conllu columns $(FPLM_FILENAME) $(FP_FILENAME) $(RULES_FILENAME) +all: tokenizer.as segmenter.as texts all_no_test.conllu columns $(FPLM_FILENAME) $(FP_FILENAME) $(RULES_FILENAME) rm col_*\.txt rm all_no_test.conllu @@ -28,6 +28,10 @@ tokenizer.as: all_no_test.conllu $(MCD) $(TOOLS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt echo "ENDWORD" >> $@ echo "ADDCHARTOWORD" >> $@ + +segmenter.as: + echo "EOS b.0" > $@ + echo "REWRITE b.0 EOS _" >> $@ columns: all_no_test.conllu $(MCD) for number in 1 2 3 4 5 6 7 8 9 10 ; do \ diff --git a/UD_any/data/getActionSets.py b/UD_any/data/getActionSets.py index 1e1b3971d1b58dd85792ad4287f687682c67b5a9..98981917a567f3aea892b4672a79fae253ff639a 100755 --- a/UD_any/data/getActionSets.py +++ b/UD_any/data/getActionSets.py @@ -81,6 +81,25 @@ if __name__ == "__main__" : output.close() elif nameCol == "LABEL" : + output = open("parser_legacy.as", 'w', encoding='utf-8') + print("REDUCE", file=output) + labels = set() + labelsList = [] + for line in open(colFile, "r", encoding='utf-8') : + striped = line.strip() + if len(striped) == 0 or striped == "root" or striped == "_" : + continue + label = striped.split(':')[0] + if label not in labels : + labels.add(striped) + labelsList.append(striped) + labelsList.sort() + for label in labelsList : + print("LEFT " + label, file=output) + print("RIGHT " + label, file=output) + print("EOS s.0", file=output) + print("Default : SHIFT", file=output) + output.close() output = open("parser.as", 'w', encoding='utf-8') print("REDUCE", file=output) labels = set() @@ -97,7 +116,6 @@ if __name__ == "__main__" : for label in labelsList : print("LEFT " + label, file=output) print("RIGHT " + label, file=output) - print("EOS", file=output) print("Default : SHIFT", file=output) output.close() diff --git a/UD_any/parser/errorCorrection.tm b/UD_any/parser/errorCorrection.tm deleted file mode 100644 index c251e16c3dbfd7f444f80a1b499af7ac34740196..0000000000000000000000000000000000000000 --- a/UD_any/parser/errorCorrection.tm +++ /dev/null @@ -1,19 +0,0 @@ -Name : Parser Machine with predicted backtracking -Dicts : parser.dicts -%CLASSIFIERS -strategy strategy.cla -signature signature.cla -parser parser.cla -error_parser error_parser.cla -%STATES -strategy strategy -signature signature -parser parser -error_parser error_parser -%TRANSITIONS -strategy signature MOVE signature -strategy parser MOVE parser -parser error_parser * -error_parser parser BACK -error_parser strategy * -signature strategy * diff --git a/UD_any/parser/error_parser.as b/UD_any/parser/error_parser.as deleted file mode 100644 index e9182f3bc4fd349a75722301d1ee825d7de8fe70..0000000000000000000000000000000000000000 --- a/UD_any/parser/error_parser.as +++ /dev/null @@ -1,5 +0,0 @@ -EPSILON -BACK 1 -BACK 2 -BACK 3 -BACK 4 diff --git a/UD_any/parser/error_parser.cla b/UD_any/parser/error_parser.cla deleted file mode 100644 index a8665563f086c0bb4c3fb29717faae9656791560..0000000000000000000000000000000000000000 --- a/UD_any/parser/error_parser.cla +++ /dev/null @@ -1,6 +0,0 @@ -Name : Error_Parser -Type : Prediction -Oracle : none -Feature Model : error_parser.fm -Action Set : error_parser.as -Topology : M(200,RELU,0.3) diff --git a/UD_any/parser/error_parser.fm b/UD_any/parser/error_parser.fm deleted file mode 100644 index dbaeb620bcc9fe6538f61da98e792060b55a0552..0000000000000000000000000000000000000000 --- a/UD_any/parser/error_parser.fm +++ /dev/null @@ -1,125 +0,0 @@ -# Features classiques -# FORM -s.0#LEMMA.fasttext -s.1#LEMMA.fasttext -s.2#LEMMA.fasttext -s.0.ldep#LEMMA.fasttext -s.1.ldep#LEMMA.fasttext -s.0.rdep#LEMMA.fasttext -s.1.rdep#LEMMA.fasttext -s.0.ldep.ldep#LEMMA.fasttext -s.1.ldep.ldep#LEMMA.fasttext -s.0.rdep.rdep#LEMMA.fasttext -s.1.rdep.rdep#LEMMA.fasttext -s.0.l2dep#LEMMA.fasttext -s.1.l2dep#LEMMA.fasttext -s.0.r2dep#LEMMA.fasttext -s.1.r2dep#LEMMA.fasttext -b.0#LEMMA.fasttext -b.1#LEMMA.fasttext -b.2#LEMMA.fasttext -b.-1#LEMMA.fasttext -b.-2#LEMMA.fasttext -b.0.ldep#LEMMA.fasttext -# POS -s.0#POS -s.1#POS -s.2#POS -s.0.ldep#POS -s.1.ldep#POS -s.0.rdep#POS -s.1.rdep#POS -s.0.ldep.ldep#POS -s.1.ldep.ldep#POS -s.0.rdep.rdep#POS -s.1.rdep.rdep#POS -s.0.l2dep#POS -s.1.l2dep#POS -s.0.r2dep#POS -s.1.r2dep#POS -b.0#POS -b.-1#POS -b.-2#POS -b.0.ldep#POS -# MORPHO -s.0#MORPHO -s.1#MORPHO -s.2#MORPHO -s.0.ldep#MORPHO -s.1.ldep#MORPHO -s.0.rdep#MORPHO -s.1.rdep#MORPHO -s.0.ldep.ldep#MORPHO -s.1.ldep.ldep#MORPHO -s.0.rdep.rdep#MORPHO -s.1.rdep.rdep#MORPHO -s.0.l2dep#MORPHO -s.1.l2dep#MORPHO -s.0.r2dep#MORPHO -s.1.r2dep#MORPHO -b.0#MORPHO -b.-1#MORPHO -b.-2#MORPHO -b.0.ldep#MORPHO -# LABELS -s.0.ldep#LABEL -s.1.ldep#LABEL -s.0.rdep#LABEL -s.1.rdep#LABEL -s.0.ldep.ldep#LABEL -s.1.ldep.ldep#LABEL -s.0.rdep.rdep#LABEL -s.1.rdep.rdep#LABEL -s.0.l2dep#LABEL -s.1.l2dep#LABEL -s.0.r2dep#LABEL -s.1.r2dep#LABEL -b.0.ldep#LABEL -# DISTANCE -s.0#DIST.s.1 -b.0#DIST.s.0 -# VALENCY -s.0#nbr -s.1#nbr -s.0#nbl -s.1#nbl -# SIGNATURES -b.1#SGN -b.2#SGN -# UPPERCASE -b.0#FORM.U -b.1#FORM.U -# EOS -b.-1#EOS -# HISTORY -tc.0 -tc.1 -tc.2 -tc.3 -tc.4 -# ERROR CORRECTION -#b.1.ldep#LEMMA.fasttext -#b.1.ldep#POS -#b.1.ldep#MORPHO -#b.1.ldep#LABEL -#b.1#LABEL -#b.1.gov#POS -#b.1.gov#MORPHO -# More feats -b.1#POS -b.1#MORPHO -b.2#POS -b.2#MORPHO -# More feats -b.-2#LABEL -b.-1#LABEL -b.0#LABEL -s.0#LABEL -s.1#LABEL -# More feats -b.-1.gov#POS -b.-1.gov#MORPHO -s.0.gov#POS -s.0.gov#MORPHO -s.1.gov#POS -s.1.gov#MORPHO diff --git a/UD_any/parser/machine.tm b/UD_any/parser/machine.tm index 66276c355e7afd860b8f1bd61b34d076d95dc1bb..648aa41cd3070c2a796ebd403b23c675d269c91d 100644 --- a/UD_any/parser/machine.tm +++ b/UD_any/parser/machine.tm @@ -2,14 +2,14 @@ Name : Parser Machine Dicts : parser.dicts %CLASSIFIERS strategy strategy.cla -signature signature.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy -signature signature parser parser +segmenter segmenter %TRANSITIONS -strategy signature MOVE signature strategy parser MOVE parser +strategy segmenter MOVE segmenter parser strategy * -signature strategy * +segmenter strategy * diff --git a/UD_any/parser/normal.tm b/UD_any/parser/normal.tm index 66276c355e7afd860b8f1bd61b34d076d95dc1bb..648aa41cd3070c2a796ebd403b23c675d269c91d 100644 --- a/UD_any/parser/normal.tm +++ b/UD_any/parser/normal.tm @@ -2,14 +2,14 @@ Name : Parser Machine Dicts : parser.dicts %CLASSIFIERS strategy strategy.cla -signature signature.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy -signature signature parser parser +segmenter segmenter %TRANSITIONS -strategy signature MOVE signature strategy parser MOVE parser +strategy segmenter MOVE segmenter parser strategy * -signature strategy * +segmenter strategy * diff --git a/UD_any/parser/parser.dicts b/UD_any/parser/parser.dicts index 1c5346f92d4057237b89cef3d337b667526ff540..b39dbe48f7cacf510141a1623b811e8f6fb43678 100644 --- a/UD_any/parser/parser.dicts +++ b/UD_any/parser/parser.dicts @@ -12,6 +12,14 @@ Parser_letters 30 Embeddings _ 200000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 ######################################################## +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 +######################################################## Error_Parser_actions 18 Embeddings _ Error_Parser_bool 16 Embeddings _ Error_Parser_int 16 Embeddings _ diff --git a/UD_any/parser/segmenter.cla b/UD_any/parser/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/parser/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/parser/signature.cla b/UD_any/parser/signature.cla deleted file mode 100644 index e979899ecb7c9d25093b7f8298505f7ba54682d1..0000000000000000000000000000000000000000 --- a/UD_any/parser/signature.cla +++ /dev/null @@ -1,4 +0,0 @@ -Name : Signature -Type : Information -Oracle : signature -Oracle Filename : data/fP diff --git a/UD_any/parser_basic/parser.cla b/UD_any/parser_basic/parser.cla index b3b31dd3d0dfe0cefa1964d7cf4486af997d6817..504a9d3213d1c6ea01cbabc7e1b49e4f755dc03e 100644 --- a/UD_any/parser_basic/parser.cla +++ b/UD_any/parser_basic/parser.cla @@ -2,6 +2,6 @@ Name : Parser Type : Prediction Oracle : parser Feature Model : parser.fm -Action Set : data/parser.as +Action Set : data/parser_legacy.as Topology : (500,RELU,0.3) Dynamic : yes diff --git a/UD_any/parser_basic/strategy.cla b/UD_any/parser_basic/strategy.cla index 29dc2513655274a0dffbddcee2586b72075e10a5..b9ace885d01f76980ddbd891eef8341108184242 100644 --- a/UD_any/parser_basic/strategy.cla +++ b/UD_any/parser_basic/strategy.cla @@ -1,4 +1,4 @@ Name : Strategy Type : Information -Oracle : strategy_parser +Oracle : strategy_parser_legacy Oracle Filename : none diff --git a/UD_any/segmenter/machine.tm b/UD_any/segmenter/machine.tm new file mode 100644 index 0000000000000000000000000000000000000000..7bf5e1963e20f3ca12f2e9ef49c6e8c33242454f --- /dev/null +++ b/UD_any/segmenter/machine.tm @@ -0,0 +1,11 @@ +Name : Sentence Segmenter Machine +Dicts : segmenter.dicts +%CLASSIFIERS +strategy strategy.cla +segmenter segmenter.cla +%STATES +strategy strategy +segmenter segmenter +%TRANSITIONS +strategy segmenter * +segmenter strategy * diff --git a/UD_any/segmenter/segmenter.cla b/UD_any/segmenter/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/segmenter/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/segmenter/segmenter.dicts b/UD_any/segmenter/segmenter.dicts new file mode 100644 index 0000000000000000000000000000000000000000..86a037762083a350f4c1e9985694adb3c63781c9 --- /dev/null +++ b/UD_any/segmenter/segmenter.dicts @@ -0,0 +1,9 @@ +#Name Dimension Mode file # +######################################################################### +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 diff --git a/UD_any/segmenter/strategy.cla b/UD_any/segmenter/strategy.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a2c5ae241b52868aa5a969bde321a3a39c050d9 --- /dev/null +++ b/UD_any/segmenter/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_segmenter +Oracle Filename : none diff --git a/UD_any/segmenter/test.bd b/UD_any/segmenter/test.bd new file mode 100644 index 0000000000000000000000000000000000000000..03a0d7023457b4bf7a36afbbb17d0a9ff88d5bf1 --- /dev/null +++ b/UD_any/segmenter/test.bd @@ -0,0 +1,6 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################## +0 ID hyp none Final 1 +1 FORM ref form Final 1 +3 POS ref pos Final 1 +0 EOS hyp int Final 0 diff --git a/UD_any/segmenter/train.bd b/UD_any/segmenter/train.bd new file mode 100644 index 0000000000000000000000000000000000000000..0fea5ba405afcc18e2247a5533ba52bdd38b0b56 --- /dev/null +++ b/UD_any/segmenter/train.bd @@ -0,0 +1,6 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################## +0 ID hyp none FromZero 1 +1 FORM ref form FromZero 1 +3 POS ref pos FromZero 1 +0 EOS hyp int FromZero 0 diff --git a/UD_any/tagparser/machine.tm b/UD_any/tagparser/machine.tm index 77ebff3a44bbe8eab9cff6ccad1442f5be33ad7e..e0858d0b5e732772359c0aa4f6839affedb9cbf5 100644 --- a/UD_any/tagparser/machine.tm +++ b/UD_any/tagparser/machine.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser/normal.tm b/UD_any/tagparser/normal.tm index 77ebff3a44bbe8eab9cff6ccad1442f5be33ad7e..e0858d0b5e732772359c0aa4f6839affedb9cbf5 100644 --- a/UD_any/tagparser/normal.tm +++ b/UD_any/tagparser/normal.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser/segmenter.cla b/UD_any/tagparser/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/tagparser/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tagparser/tagparser.dicts b/UD_any/tagparser/tagparser.dicts index c328e311736e2046fa2c847b03c0e107c88d96fe..9672e0026197a43a5aa77de98b15a79a90af3e69 100644 --- a/UD_any/tagparser/tagparser.dicts +++ b/UD_any/tagparser/tagparser.dicts @@ -49,6 +49,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser_sequential/machine.tm b/UD_any/tagparser_sequential/machine.tm index f1c0b0e1433625567abc923c9d41251539119962..28176893c5003a3642cb39a15d44043461ded74d 100644 --- a/UD_any/tagparser_sequential/machine.tm +++ b/UD_any/tagparser_sequential/machine.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential/normal.tm b/UD_any/tagparser_sequential/normal.tm index f1c0b0e1433625567abc923c9d41251539119962..28176893c5003a3642cb39a15d44043461ded74d 100644 --- a/UD_any/tagparser_sequential/normal.tm +++ b/UD_any/tagparser_sequential/normal.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential/segmenter.cla b/UD_any/tagparser_sequential/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/tagparser_sequential/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tagparser_sequential/tagparser.dicts b/UD_any/tagparser_sequential/tagparser.dicts index c328e311736e2046fa2c847b03c0e107c88d96fe..9672e0026197a43a5aa77de98b15a79a90af3e69 100644 --- a/UD_any/tagparser_sequential/tagparser.dicts +++ b/UD_any/tagparser_sequential/tagparser.dicts @@ -49,6 +49,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser_sequential_strong/machine.tm b/UD_any/tagparser_sequential_strong/machine.tm index f1c0b0e1433625567abc923c9d41251539119962..e0858d0b5e732772359c0aa4f6839affedb9cbf5 100644 --- a/UD_any/tagparser_sequential_strong/machine.tm +++ b/UD_any/tagparser_sequential_strong/machine.tm @@ -1,4 +1,4 @@ -Name : Tagger, Morpho, Lemmatizer and Parser sequential Machine +Name : Tagger, Morpho, Lemmatizer and Parser Machine Dicts : tagparser.dicts %CLASSIFIERS strategy strategy.cla @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential_strong/normal.tm b/UD_any/tagparser_sequential_strong/normal.tm index f1c0b0e1433625567abc923c9d41251539119962..e0858d0b5e732772359c0aa4f6839affedb9cbf5 100644 --- a/UD_any/tagparser_sequential_strong/normal.tm +++ b/UD_any/tagparser_sequential_strong/normal.tm @@ -1,4 +1,4 @@ -Name : Tagger, Morpho, Lemmatizer and Parser sequential Machine +Name : Tagger, Morpho, Lemmatizer and Parser Machine Dicts : tagparser.dicts %CLASSIFIERS strategy strategy.cla @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential_strong/segmenter.cla b/UD_any/tagparser_sequential_strong/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/tagparser_sequential_strong/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tagparser_sequential_strong/tagparser.dicts b/UD_any/tagparser_sequential_strong/tagparser.dicts index c328e311736e2046fa2c847b03c0e107c88d96fe..9672e0026197a43a5aa77de98b15a79a90af3e69 100644 --- a/UD_any/tagparser_sequential_strong/tagparser.dicts +++ b/UD_any/tagparser_sequential_strong/tagparser.dicts @@ -49,6 +49,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tokeparser/machine.tm b/UD_any/tokeparser/machine.tm index 0e1b0438deab0947a3624d9ac94a76e648715c4b..f591178c203e6b19d86ed8fc136b8ec7c74a505e 100644 --- a/UD_any/tokeparser/machine.tm +++ b/UD_any/tokeparser/machine.tm @@ -9,6 +9,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tokenizer tokenizer @@ -18,6 +19,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tokenizer MOVE tokenizer strategy tagger MOVE tagger @@ -26,6 +28,7 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tokenizer strategy * tagger strategy * morpho strategy * @@ -33,3 +36,4 @@ lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser/segmenter.cla b/UD_any/tokeparser/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/tokeparser/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser/tokeparser.dicts b/UD_any/tokeparser/tokeparser.dicts index 4534960f51830c6ceca5c08338ad7eb604b34e55..c9235b91d69bcb166103ad08ad18000edbdc1a25 100644 --- a/UD_any/tokeparser/tokeparser.dicts +++ b/UD_any/tokeparser/tokeparser.dicts @@ -56,6 +56,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tokeparser_incremental/machine.tm b/UD_any/tokeparser_incremental/machine.tm index 0e1b0438deab0947a3624d9ac94a76e648715c4b..f591178c203e6b19d86ed8fc136b8ec7c74a505e 100644 --- a/UD_any/tokeparser_incremental/machine.tm +++ b/UD_any/tokeparser_incremental/machine.tm @@ -9,6 +9,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tokenizer tokenizer @@ -18,6 +19,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tokenizer MOVE tokenizer strategy tagger MOVE tagger @@ -26,6 +28,7 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tokenizer strategy * tagger strategy * morpho strategy * @@ -33,3 +36,4 @@ lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental/segmenter.cla b/UD_any/tokeparser_incremental/segmenter.cla new file mode 100644 index 0000000000000000000000000000000000000000..6a817d1e216d65c0579bb1c5ae393075dfc499d4 --- /dev/null +++ b/UD_any/tokeparser_incremental/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental/tokeparser.dicts b/UD_any/tokeparser_incremental/tokeparser.dicts index 7b7fd2b27ca28cfcc0532444450b73e6bd27a6b1..fd785028d6d6217cf440b8b45d426fe66102d40b 100644 --- a/UD_any/tokeparser_incremental/tokeparser.dicts +++ b/UD_any/tokeparser_incremental/tokeparser.dicts @@ -59,6 +59,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _