From 6607f37acade5f87fc90e6367095670b03d146e6 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Sun, 3 May 2020 18:59:32 +0200 Subject: [PATCH] updated tagparser_seq for new strategy --- UD_any/data/Makefile | 4 ++-- UD_any/data/getTransitionSets.py | 20 -------------------- UD_any/tagparser_seq/machine.rm | 28 +++++++++++++++++----------- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index 49aea81..926906c 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -37,14 +37,14 @@ tokenizer.ts: all_no_test.conllu $(MCD) segmenter.ts: echo "EOS b.0" > $@ - echo "REWRITE b.0 EOS _" >> $@ + echo "NOTHING" >> $@ + sed -i -e 's/^/<segmenter> /' $@ columns: all_no_test.conllu $(MCD) for number in 1 2 3 4 5 6 7 8 9 10 ; do \ cat all_no_test.conllu | sed '/^#/ d' | cut -f$$number | sort --unique > col_$$number.txt ; \ done ./getTransitionSets.py $(MCD) col_*\.txt - cat tagger.ts parser.ts > taggerparser.ts texts: ./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES) diff --git a/UD_any/data/getTransitionSets.py b/UD_any/data/getTransitionSets.py index 34b9551..dd5ad30 100755 --- a/UD_any/data/getTransitionSets.py +++ b/UD_any/data/getTransitionSets.py @@ -70,25 +70,6 @@ if __name__ == "__main__" : output.close() elif nameCol == "DEPREL" : - output = open("parser_legacy.ts", 'w', encoding='utf-8') - print("REDUCE", file=output) - labels = set() - labelsList = [] - for line in open(colFile, "r", encoding='utf-8') : - striped = line.strip() - if len(striped) == 0 or striped == "root" or striped == "_" : - continue - label = striped.split(':')[0] - if label not in labels : - labels.add(striped) - labelsList.append(striped) - labelsList.sort() - for label in labelsList : - print("LEFT " + label, file=output) - print("RIGHT " + label, file=output) - print("EOS s.0", file=output) - print("SHIFT", file=output) - output.close() output = open("parser.ts", 'w', encoding='utf-8') print("<parser> REDUCE", file=output) labels = set() @@ -105,7 +86,6 @@ if __name__ == "__main__" : for label in labelsList : print("<parser> LEFT " + label, file=output) print("<parser> RIGHT " + label, file=output) - print("<parser> EOS", file=output) print("<parser> SHIFT", file=output) output.close() diff --git a/UD_any/tagparser_seq/machine.rm b/UD_any/tagparser_seq/machine.rm index a6ef1f1..9efe192 100644 --- a/UD_any/tagparser_seq/machine.rm +++ b/UD_any/tagparser_seq/machine.rm @@ -1,9 +1,9 @@ -Name : Tagger, Feats and Parser Machine -Classifier : tagmorpho +Name : Tagger, Feats , Parser and Segmenter sequential Machine +Classifier : tagparser { - Transitions : {tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} + Transitions : {tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts segmenter,data/segmenter.ts} Network type : Modular - StateName : States{tagger morpho parser} Out{128} + StateName : States{tagger morpho parser segmenter} Out{128} Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} GRU{1 1 0 1} In{64} Out{128} Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{128} Focused : Column{FORM} NbElem{10} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{128} @@ -17,10 +17,16 @@ Classifier : tagmorpho Optimizer : Adam {0.0002 0.9 0.999 0.00000001 0.00001 true} } Predictions : UPOS FEATS HEAD DEPREL EOS -Strategy : sequential - tagger morpho 1 - morpho parser NOTHING 1 - morpho parser 0 - parser tagger SHIFT 1 - parser tagger RIGHT 1 - parser tagger 0 +Strategy +{ + Block : End{cannotMove} + tagger tagger * 1 + Block : End{cannotMove} + morpho morpho NOTHING 1 + morpho morpho * 0 + Block : End{cannotMove} + parser segmenter SHIFT 0 + parser segmenter RIGHT 0 + parser parser * 0 + segmenter parser * 1 +} -- GitLab