From 7f4221855d58db770c5467dac5ee2b07421f925d Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Mon, 25 Nov 2019 01:14:56 +0100 Subject: [PATCH] EOS is now predicted by a new classifier called segmenter --- UD_any/data/Makefile | 6 +- UD_any/data/getActionSets.py | 20 ++- UD_any/parser/errorCorrection.tm | 19 --- UD_any/parser/error_parser.as | 5 - UD_any/parser/error_parser.cla | 6 - UD_any/parser/error_parser.fm | 125 ------------------ UD_any/parser/machine.tm | 8 +- UD_any/parser/normal.tm | 8 +- UD_any/parser/parser.dicts | 8 ++ UD_any/parser/segmenter.cla | 7 + UD_any/parser/signature.cla | 4 - UD_any/parser_basic/parser.cla | 2 +- UD_any/parser_basic/strategy.cla | 2 +- UD_any/segmenter/machine.tm | 11 ++ UD_any/segmenter/segmenter.cla | 7 + UD_any/segmenter/segmenter.dicts | 9 ++ UD_any/segmenter/strategy.cla | 4 + UD_any/segmenter/test.bd | 6 + UD_any/segmenter/train.bd | 6 + UD_any/tagparser/machine.tm | 4 + UD_any/tagparser/normal.tm | 4 + UD_any/tagparser/segmenter.cla | 7 + UD_any/tagparser/tagparser.dicts | 8 ++ UD_any/tagparser_sequential/machine.tm | 4 + UD_any/tagparser_sequential/normal.tm | 4 + UD_any/tagparser_sequential/segmenter.cla | 7 + UD_any/tagparser_sequential/tagparser.dicts | 8 ++ UD_any/tagparser_sequential_strong/machine.tm | 6 +- UD_any/tagparser_sequential_strong/normal.tm | 6 +- .../tagparser_sequential_strong/segmenter.cla | 7 + .../tagparser.dicts | 8 ++ UD_any/tokeparser/machine.tm | 4 + UD_any/tokeparser/segmenter.cla | 7 + UD_any/tokeparser/tokeparser.dicts | 8 ++ UD_any/tokeparser_incremental/machine.tm | 4 + UD_any/tokeparser_incremental/segmenter.cla | 7 + .../tokeparser_incremental/tokeparser.dicts | 8 ++ 37 files changed, 201 insertions(+), 173 deletions(-) delete mode 100644 UD_any/parser/errorCorrection.tm delete mode 100644 UD_any/parser/error_parser.as delete mode 100644 UD_any/parser/error_parser.cla delete mode 100644 UD_any/parser/error_parser.fm create mode 100644 UD_any/parser/segmenter.cla delete mode 100644 UD_any/parser/signature.cla create mode 100644 UD_any/segmenter/machine.tm create mode 100644 UD_any/segmenter/segmenter.cla create mode 100644 UD_any/segmenter/segmenter.dicts create mode 100644 UD_any/segmenter/strategy.cla create mode 100644 UD_any/segmenter/test.bd create mode 100644 UD_any/segmenter/train.bd create mode 100644 UD_any/tagparser/segmenter.cla create mode 100644 UD_any/tagparser_sequential/segmenter.cla create mode 100644 UD_any/tagparser_sequential_strong/segmenter.cla create mode 100644 UD_any/tokeparser/segmenter.cla create mode 100644 UD_any/tokeparser_incremental/segmenter.cla diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index f424c10..86c5e4a 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -15,7 +15,7 @@ FP_FILENAME=fP RULES_FILENAME=lemmatizer_rules.as EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm -all: tokenizer.as texts all_no_test.conllu columns $(FPLM_FILENAME) $(FP_FILENAME) $(RULES_FILENAME) +all: tokenizer.as segmenter.as texts all_no_test.conllu columns $(FPLM_FILENAME) $(FP_FILENAME) $(RULES_FILENAME) rm col_*\.txt rm all_no_test.conllu @@ -28,6 +28,10 @@ tokenizer.as: all_no_test.conllu $(MCD) $(TOOLS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt echo "ENDWORD" >> $@ echo "ADDCHARTOWORD" >> $@ + +segmenter.as: + echo "EOS b.0" > $@ + echo "REWRITE b.0 EOS _" >> $@ columns: all_no_test.conllu $(MCD) for number in 1 2 3 4 5 6 7 8 9 10 ; do \ diff --git a/UD_any/data/getActionSets.py b/UD_any/data/getActionSets.py index 1e1b397..9898191 100755 --- a/UD_any/data/getActionSets.py +++ b/UD_any/data/getActionSets.py @@ -81,6 +81,25 @@ if __name__ == "__main__" : output.close() elif nameCol == "LABEL" : + output = open("parser_legacy.as", 'w', encoding='utf-8') + print("REDUCE", file=output) + labels = set() + labelsList = [] + for line in open(colFile, "r", encoding='utf-8') : + striped = line.strip() + if len(striped) == 0 or striped == "root" or striped == "_" : + continue + label = striped.split(':')[0] + if label not in labels : + labels.add(striped) + labelsList.append(striped) + labelsList.sort() + for label in labelsList : + print("LEFT " + label, file=output) + print("RIGHT " + label, file=output) + print("EOS s.0", file=output) + print("Default : SHIFT", file=output) + output.close() output = open("parser.as", 'w', encoding='utf-8') print("REDUCE", file=output) labels = set() @@ -97,7 +116,6 @@ if __name__ == "__main__" : for label in labelsList : print("LEFT " + label, file=output) print("RIGHT " + label, file=output) - print("EOS", file=output) print("Default : SHIFT", file=output) output.close() diff --git a/UD_any/parser/errorCorrection.tm b/UD_any/parser/errorCorrection.tm deleted file mode 100644 index c251e16..0000000 --- a/UD_any/parser/errorCorrection.tm +++ /dev/null @@ -1,19 +0,0 @@ -Name : Parser Machine with predicted backtracking -Dicts : parser.dicts -%CLASSIFIERS -strategy strategy.cla -signature signature.cla -parser parser.cla -error_parser error_parser.cla -%STATES -strategy strategy -signature signature -parser parser -error_parser error_parser -%TRANSITIONS -strategy signature MOVE signature -strategy parser MOVE parser -parser error_parser * -error_parser parser BACK -error_parser strategy * -signature strategy * diff --git a/UD_any/parser/error_parser.as b/UD_any/parser/error_parser.as deleted file mode 100644 index e9182f3..0000000 --- a/UD_any/parser/error_parser.as +++ /dev/null @@ -1,5 +0,0 @@ -EPSILON -BACK 1 -BACK 2 -BACK 3 -BACK 4 diff --git a/UD_any/parser/error_parser.cla b/UD_any/parser/error_parser.cla deleted file mode 100644 index a866556..0000000 --- a/UD_any/parser/error_parser.cla +++ /dev/null @@ -1,6 +0,0 @@ -Name : Error_Parser -Type : Prediction -Oracle : none -Feature Model : error_parser.fm -Action Set : error_parser.as -Topology : M(200,RELU,0.3) diff --git a/UD_any/parser/error_parser.fm b/UD_any/parser/error_parser.fm deleted file mode 100644 index dbaeb62..0000000 --- a/UD_any/parser/error_parser.fm +++ /dev/null @@ -1,125 +0,0 @@ -# Features classiques -# FORM -s.0#LEMMA.fasttext -s.1#LEMMA.fasttext -s.2#LEMMA.fasttext -s.0.ldep#LEMMA.fasttext -s.1.ldep#LEMMA.fasttext -s.0.rdep#LEMMA.fasttext -s.1.rdep#LEMMA.fasttext -s.0.ldep.ldep#LEMMA.fasttext -s.1.ldep.ldep#LEMMA.fasttext -s.0.rdep.rdep#LEMMA.fasttext -s.1.rdep.rdep#LEMMA.fasttext -s.0.l2dep#LEMMA.fasttext -s.1.l2dep#LEMMA.fasttext -s.0.r2dep#LEMMA.fasttext -s.1.r2dep#LEMMA.fasttext -b.0#LEMMA.fasttext -b.1#LEMMA.fasttext -b.2#LEMMA.fasttext -b.-1#LEMMA.fasttext -b.-2#LEMMA.fasttext -b.0.ldep#LEMMA.fasttext -# POS -s.0#POS -s.1#POS -s.2#POS -s.0.ldep#POS -s.1.ldep#POS -s.0.rdep#POS -s.1.rdep#POS -s.0.ldep.ldep#POS -s.1.ldep.ldep#POS -s.0.rdep.rdep#POS -s.1.rdep.rdep#POS -s.0.l2dep#POS -s.1.l2dep#POS -s.0.r2dep#POS -s.1.r2dep#POS -b.0#POS -b.-1#POS -b.-2#POS -b.0.ldep#POS -# MORPHO -s.0#MORPHO -s.1#MORPHO -s.2#MORPHO -s.0.ldep#MORPHO -s.1.ldep#MORPHO -s.0.rdep#MORPHO -s.1.rdep#MORPHO -s.0.ldep.ldep#MORPHO -s.1.ldep.ldep#MORPHO -s.0.rdep.rdep#MORPHO -s.1.rdep.rdep#MORPHO -s.0.l2dep#MORPHO -s.1.l2dep#MORPHO -s.0.r2dep#MORPHO -s.1.r2dep#MORPHO -b.0#MORPHO -b.-1#MORPHO -b.-2#MORPHO -b.0.ldep#MORPHO -# LABELS -s.0.ldep#LABEL -s.1.ldep#LABEL -s.0.rdep#LABEL -s.1.rdep#LABEL -s.0.ldep.ldep#LABEL -s.1.ldep.ldep#LABEL -s.0.rdep.rdep#LABEL -s.1.rdep.rdep#LABEL -s.0.l2dep#LABEL -s.1.l2dep#LABEL -s.0.r2dep#LABEL -s.1.r2dep#LABEL -b.0.ldep#LABEL -# DISTANCE -s.0#DIST.s.1 -b.0#DIST.s.0 -# VALENCY -s.0#nbr -s.1#nbr -s.0#nbl -s.1#nbl -# SIGNATURES -b.1#SGN -b.2#SGN -# UPPERCASE -b.0#FORM.U -b.1#FORM.U -# EOS -b.-1#EOS -# HISTORY -tc.0 -tc.1 -tc.2 -tc.3 -tc.4 -# ERROR CORRECTION -#b.1.ldep#LEMMA.fasttext -#b.1.ldep#POS -#b.1.ldep#MORPHO -#b.1.ldep#LABEL -#b.1#LABEL -#b.1.gov#POS -#b.1.gov#MORPHO -# More feats -b.1#POS -b.1#MORPHO -b.2#POS -b.2#MORPHO -# More feats -b.-2#LABEL -b.-1#LABEL -b.0#LABEL -s.0#LABEL -s.1#LABEL -# More feats -b.-1.gov#POS -b.-1.gov#MORPHO -s.0.gov#POS -s.0.gov#MORPHO -s.1.gov#POS -s.1.gov#MORPHO diff --git a/UD_any/parser/machine.tm b/UD_any/parser/machine.tm index 66276c3..648aa41 100644 --- a/UD_any/parser/machine.tm +++ b/UD_any/parser/machine.tm @@ -2,14 +2,14 @@ Name : Parser Machine Dicts : parser.dicts %CLASSIFIERS strategy strategy.cla -signature signature.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy -signature signature parser parser +segmenter segmenter %TRANSITIONS -strategy signature MOVE signature strategy parser MOVE parser +strategy segmenter MOVE segmenter parser strategy * -signature strategy * +segmenter strategy * diff --git a/UD_any/parser/normal.tm b/UD_any/parser/normal.tm index 66276c3..648aa41 100644 --- a/UD_any/parser/normal.tm +++ b/UD_any/parser/normal.tm @@ -2,14 +2,14 @@ Name : Parser Machine Dicts : parser.dicts %CLASSIFIERS strategy strategy.cla -signature signature.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy -signature signature parser parser +segmenter segmenter %TRANSITIONS -strategy signature MOVE signature strategy parser MOVE parser +strategy segmenter MOVE segmenter parser strategy * -signature strategy * +segmenter strategy * diff --git a/UD_any/parser/parser.dicts b/UD_any/parser/parser.dicts index 1c5346f..b39dbe4 100644 --- a/UD_any/parser/parser.dicts +++ b/UD_any/parser/parser.dicts @@ -12,6 +12,14 @@ Parser_letters 30 Embeddings _ 200000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 ######################################################## +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 +######################################################## Error_Parser_actions 18 Embeddings _ Error_Parser_bool 16 Embeddings _ Error_Parser_int 16 Embeddings _ diff --git a/UD_any/parser/segmenter.cla b/UD_any/parser/segmenter.cla new file mode 100644 index 0000000..6a817d1 --- /dev/null +++ b/UD_any/parser/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/parser/signature.cla b/UD_any/parser/signature.cla deleted file mode 100644 index e979899..0000000 --- a/UD_any/parser/signature.cla +++ /dev/null @@ -1,4 +0,0 @@ -Name : Signature -Type : Information -Oracle : signature -Oracle Filename : data/fP diff --git a/UD_any/parser_basic/parser.cla b/UD_any/parser_basic/parser.cla index b3b31dd..504a9d3 100644 --- a/UD_any/parser_basic/parser.cla +++ b/UD_any/parser_basic/parser.cla @@ -2,6 +2,6 @@ Name : Parser Type : Prediction Oracle : parser Feature Model : parser.fm -Action Set : data/parser.as +Action Set : data/parser_legacy.as Topology : (500,RELU,0.3) Dynamic : yes diff --git a/UD_any/parser_basic/strategy.cla b/UD_any/parser_basic/strategy.cla index 29dc251..b9ace88 100644 --- a/UD_any/parser_basic/strategy.cla +++ b/UD_any/parser_basic/strategy.cla @@ -1,4 +1,4 @@ Name : Strategy Type : Information -Oracle : strategy_parser +Oracle : strategy_parser_legacy Oracle Filename : none diff --git a/UD_any/segmenter/machine.tm b/UD_any/segmenter/machine.tm new file mode 100644 index 0000000..7bf5e19 --- /dev/null +++ b/UD_any/segmenter/machine.tm @@ -0,0 +1,11 @@ +Name : Sentence Segmenter Machine +Dicts : segmenter.dicts +%CLASSIFIERS +strategy strategy.cla +segmenter segmenter.cla +%STATES +strategy strategy +segmenter segmenter +%TRANSITIONS +strategy segmenter * +segmenter strategy * diff --git a/UD_any/segmenter/segmenter.cla b/UD_any/segmenter/segmenter.cla new file mode 100644 index 0000000..6a817d1 --- /dev/null +++ b/UD_any/segmenter/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/segmenter/segmenter.dicts b/UD_any/segmenter/segmenter.dicts new file mode 100644 index 0000000..86a0377 --- /dev/null +++ b/UD_any/segmenter/segmenter.dicts @@ -0,0 +1,9 @@ +#Name Dimension Mode file # +######################################################################### +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 diff --git a/UD_any/segmenter/strategy.cla b/UD_any/segmenter/strategy.cla new file mode 100644 index 0000000..6a2c5ae --- /dev/null +++ b/UD_any/segmenter/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_segmenter +Oracle Filename : none diff --git a/UD_any/segmenter/test.bd b/UD_any/segmenter/test.bd new file mode 100644 index 0000000..03a0d70 --- /dev/null +++ b/UD_any/segmenter/test.bd @@ -0,0 +1,6 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################## +0 ID hyp none Final 1 +1 FORM ref form Final 1 +3 POS ref pos Final 1 +0 EOS hyp int Final 0 diff --git a/UD_any/segmenter/train.bd b/UD_any/segmenter/train.bd new file mode 100644 index 0000000..0fea5ba --- /dev/null +++ b/UD_any/segmenter/train.bd @@ -0,0 +1,6 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################## +0 ID hyp none FromZero 1 +1 FORM ref form FromZero 1 +3 POS ref pos FromZero 1 +0 EOS hyp int FromZero 0 diff --git a/UD_any/tagparser/machine.tm b/UD_any/tagparser/machine.tm index 77ebff3..e0858d0 100644 --- a/UD_any/tagparser/machine.tm +++ b/UD_any/tagparser/machine.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser/normal.tm b/UD_any/tagparser/normal.tm index 77ebff3..e0858d0 100644 --- a/UD_any/tagparser/normal.tm +++ b/UD_any/tagparser/normal.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser/segmenter.cla b/UD_any/tagparser/segmenter.cla new file mode 100644 index 0000000..6a817d1 --- /dev/null +++ b/UD_any/tagparser/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tagparser/tagparser.dicts b/UD_any/tagparser/tagparser.dicts index c328e31..9672e00 100644 --- a/UD_any/tagparser/tagparser.dicts +++ b/UD_any/tagparser/tagparser.dicts @@ -49,6 +49,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser_sequential/machine.tm b/UD_any/tagparser_sequential/machine.tm index f1c0b0e..2817689 100644 --- a/UD_any/tagparser_sequential/machine.tm +++ b/UD_any/tagparser_sequential/machine.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential/normal.tm b/UD_any/tagparser_sequential/normal.tm index f1c0b0e..2817689 100644 --- a/UD_any/tagparser_sequential/normal.tm +++ b/UD_any/tagparser_sequential/normal.tm @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential/segmenter.cla b/UD_any/tagparser_sequential/segmenter.cla new file mode 100644 index 0000000..6a817d1 --- /dev/null +++ b/UD_any/tagparser_sequential/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tagparser_sequential/tagparser.dicts b/UD_any/tagparser_sequential/tagparser.dicts index c328e31..9672e00 100644 --- a/UD_any/tagparser_sequential/tagparser.dicts +++ b/UD_any/tagparser_sequential/tagparser.dicts @@ -49,6 +49,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tagparser_sequential_strong/machine.tm b/UD_any/tagparser_sequential_strong/machine.tm index f1c0b0e..e0858d0 100644 --- a/UD_any/tagparser_sequential_strong/machine.tm +++ b/UD_any/tagparser_sequential_strong/machine.tm @@ -1,4 +1,4 @@ -Name : Tagger, Morpho, Lemmatizer and Parser sequential Machine +Name : Tagger, Morpho, Lemmatizer and Parser Machine Dicts : tagparser.dicts %CLASSIFIERS strategy strategy.cla @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential_strong/normal.tm b/UD_any/tagparser_sequential_strong/normal.tm index f1c0b0e..e0858d0 100644 --- a/UD_any/tagparser_sequential_strong/normal.tm +++ b/UD_any/tagparser_sequential_strong/normal.tm @@ -1,4 +1,4 @@ -Name : Tagger, Morpho, Lemmatizer and Parser sequential Machine +Name : Tagger, Morpho, Lemmatizer and Parser Machine Dicts : tagparser.dicts %CLASSIFIERS strategy strategy.cla @@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tagger tagger @@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tagger MOVE tagger strategy morpho MOVE morpho @@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tagger strategy * morpho strategy * lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tagparser_sequential_strong/segmenter.cla b/UD_any/tagparser_sequential_strong/segmenter.cla new file mode 100644 index 0000000..6a817d1 --- /dev/null +++ b/UD_any/tagparser_sequential_strong/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tagparser_sequential_strong/tagparser.dicts b/UD_any/tagparser_sequential_strong/tagparser.dicts index c328e31..9672e00 100644 --- a/UD_any/tagparser_sequential_strong/tagparser.dicts +++ b/UD_any/tagparser_sequential_strong/tagparser.dicts @@ -49,6 +49,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tokeparser/machine.tm b/UD_any/tokeparser/machine.tm index 0e1b043..f591178 100644 --- a/UD_any/tokeparser/machine.tm +++ b/UD_any/tokeparser/machine.tm @@ -9,6 +9,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tokenizer tokenizer @@ -18,6 +19,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tokenizer MOVE tokenizer strategy tagger MOVE tagger @@ -26,6 +28,7 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tokenizer strategy * tagger strategy * morpho strategy * @@ -33,3 +36,4 @@ lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser/segmenter.cla b/UD_any/tokeparser/segmenter.cla new file mode 100644 index 0000000..6a817d1 --- /dev/null +++ b/UD_any/tokeparser/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser/tokeparser.dicts b/UD_any/tokeparser/tokeparser.dicts index 4534960..c9235b9 100644 --- a/UD_any/tokeparser/tokeparser.dicts +++ b/UD_any/tokeparser/tokeparser.dicts @@ -56,6 +56,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ diff --git a/UD_any/tokeparser_incremental/machine.tm b/UD_any/tokeparser_incremental/machine.tm index 0e1b043..f591178 100644 --- a/UD_any/tokeparser_incremental/machine.tm +++ b/UD_any/tokeparser_incremental/machine.tm @@ -9,6 +9,7 @@ lemmatizer_lookup lemmatizer_lookup.cla lemmatizer_rules lemmatizer_rules.cla lemmatizer_case lemmatizer_case.cla parser parser.cla +segmenter segmenter.cla %STATES strategy strategy tokenizer tokenizer @@ -18,6 +19,7 @@ lemmatizer_lookup lemmatizer_lookup lemmatizer_rules lemmatizer_rules lemmatizer_case lemmatizer_case parser parser +segmenter segmenter %TRANSITIONS strategy tokenizer MOVE tokenizer strategy tagger MOVE tagger @@ -26,6 +28,7 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup strategy lemmatizer_rules MOVE lemmatizer_rules strategy lemmatizer_case MOVE lemmatizer_case strategy parser MOVE parser +strategy segmenter MOVE segmenter tokenizer strategy * tagger strategy * morpho strategy * @@ -33,3 +36,4 @@ lemmatizer_lookup strategy * lemmatizer_case strategy * lemmatizer_rules strategy * parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental/segmenter.cla b/UD_any/tokeparser_incremental/segmenter.cla new file mode 100644 index 0000000..6a817d1 --- /dev/null +++ b/UD_any/tokeparser_incremental/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/tagger.fm +Action Set : data/segmenter.as +Topology : (500,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental/tokeparser.dicts b/UD_any/tokeparser_incremental/tokeparser.dicts index 7b7fd2b..fd78502 100644 --- a/UD_any/tokeparser_incremental/tokeparser.dicts +++ b/UD_any/tokeparser_incremental/tokeparser.dicts @@ -59,6 +59,14 @@ Parser_lemma 30 Embeddings _ 50000 Parser_letters 30 Embeddings _ 20000 Parser_labels 18 Embeddings _ 50 Parser_morpho 22 Embeddings _ 50000 +# SEGMENTER +Segmenter_bool 02 Embeddings _ 5 +Segmenter_int 05 Embeddings _ 200 +Segmenter_letters 30 Embeddings _ 200000 +Segmenter_pos 15 Embeddings _ 21 +Segmenter_form 30 Embeddings _ 50000 +Segmenter_form.f 30 Embeddings _ 200000 +Segmenter_actions 05 Embeddings _ 21 # ERROR_TAGGER Error_Tagger_actions 18 Embeddings _ Error_Tagger_bool 16 Embeddings _ -- GitLab