diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index 49aea81e1f9fc5fbc29803d21d1b6d40a8d22e77..926906cd7530888c29ec89a9203eb50533433b1f 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -37,14 +37,14 @@ tokenizer.ts: all_no_test.conllu $(MCD) segmenter.ts: echo "EOS b.0" > $@ - echo "REWRITE b.0 EOS _" >> $@ + echo "NOTHING" >> $@ + sed -i -e 's/^/<segmenter> /' $@ columns: all_no_test.conllu $(MCD) for number in 1 2 3 4 5 6 7 8 9 10 ; do \ cat all_no_test.conllu | sed '/^#/ d' | cut -f$$number | sort --unique > col_$$number.txt ; \ done ./getTransitionSets.py $(MCD) col_*\.txt - cat tagger.ts parser.ts > taggerparser.ts texts: ./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES) diff --git a/UD_any/data/getTransitionSets.py b/UD_any/data/getTransitionSets.py index 34b9551dabe61cdce20ea77dccd6d1c2fccd04fe..dd5ad305b9e2cd3638271b72915f196ee01d2e5e 100755 --- a/UD_any/data/getTransitionSets.py +++ b/UD_any/data/getTransitionSets.py @@ -70,25 +70,6 @@ if __name__ == "__main__" : output.close() elif nameCol == "DEPREL" : - output = open("parser_legacy.ts", 'w', encoding='utf-8') - print("REDUCE", file=output) - labels = set() - labelsList = [] - for line in open(colFile, "r", encoding='utf-8') : - striped = line.strip() - if len(striped) == 0 or striped == "root" or striped == "_" : - continue - label = striped.split(':')[0] - if label not in labels : - labels.add(striped) - labelsList.append(striped) - labelsList.sort() - for label in labelsList : - print("LEFT " + label, file=output) - print("RIGHT " + label, file=output) - print("EOS s.0", file=output) - print("SHIFT", file=output) - output.close() output = open("parser.ts", 'w', encoding='utf-8') print("<parser> REDUCE", file=output) labels = set() @@ -105,7 +86,6 @@ if __name__ == "__main__" : for label in labelsList : print("<parser> LEFT " + label, file=output) print("<parser> RIGHT " + label, file=output) - print("<parser> EOS", file=output) print("<parser> SHIFT", file=output) output.close() diff --git a/UD_any/launchBatches.py b/UD_any/launchBatches.py index 8355460b0a947a0e25e3e03c9ebe866ef775114d..40ef2439b2a267ddd4dad796c32309ad04ab6de9 100755 --- a/UD_any/launchBatches.py +++ b/UD_any/launchBatches.py @@ -182,7 +182,13 @@ def getOarNbUsedGpuPerNode() : cores = 1 gpunum = 1 if "core=" in ressources : - cores = int(ressources.split("core=")[-1].split('/')[0]) + coresStr="" + coresStrBase = ressources.split("core=")[-1] + for symbol in coresStrBase : + if symbol < '0' or symbol > '9' : + break + coresStr = coresStr + symbol + cores = int(coresStr) if "gpunum=" in ressources : gpunum = int(ressources.split("gpunum=")[-1].split(',')[0]) diff --git a/UD_any/tagparser_incr/machine.rm b/UD_any/tagparser_incr/machine.rm new file mode 100644 index 0000000000000000000000000000000000000000..04a60dbcd5de48961d7d2a0f1d2d867bd396996e --- /dev/null +++ b/UD_any/tagparser_incr/machine.rm @@ -0,0 +1,30 @@ +Name : Tagger, Feats , Parser and Segmenter incremental Machine +Classifier : tagparser +{ + Transitions : {tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts segmenter,data/segmenter.ts} + Network type : Modular + StateName : States{tagger morpho parser segmenter} Out{64} + Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{FORM} NbElem{10} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{FEATS} NbElem{10} Buffer{-1 0} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} GRU{1 1 0.0 1} In{64} Out{64} + InputDropout : 0.3 + MLP : {2048 0.3} + End + Optimizer : Adam {0.0002 0.9 0.999 0.00000001 0.00001 true} +} +Predictions : UPOS FEATS HEAD DEPREL EOS +Strategy +{ + Block : End{cannotMove} + tagger morpho * 0 + morpho parser NOTHING 0 + morpho morpho * 0 + parser segmenter SHIFT 0 + parser segmenter RIGHT 0 + parser parser * 0 + segmenter tagger * 1 +} diff --git a/UD_any/tagparser_seq/machine.rm b/UD_any/tagparser_seq/machine.rm index a6ef1f16cbe12b31655587496c7452411a0e32d6..7364977b36f05e1965750ffd8b1beed70f7bcfc9 100644 --- a/UD_any/tagparser_seq/machine.rm +++ b/UD_any/tagparser_seq/machine.rm @@ -1,26 +1,32 @@ -Name : Tagger, Feats and Parser Machine -Classifier : tagmorpho +Name : Tagger, Feats , Parser and Segmenter sequential Machine +Classifier : tagparser { - Transitions : {tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts} + Transitions : {tagger,data/tagger.ts morpho,data/morpho_parts.ts parser,data/parser.ts segmenter,data/segmenter.ts} Network type : Modular - StateName : States{tagger morpho parser} Out{128} - Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} GRU{1 1 0 1} In{64} Out{128} - Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{128} - Focused : Column{FORM} NbElem{10} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{128} - Focused : Column{FEATS} NbElem{10} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{128} - Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{128} - Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{128} - DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} GRU{1 1 0.0 1} In{64} Out{128} + StateName : States{tagger morpho parser segmenter} Out{64} + Context : Buffer{-3 -2 -1 0 1 2} Stack{} Columns{FORM UPOS} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{ID} NbElem{1} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{FORM} NbElem{10} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{FEATS} NbElem{10} Buffer{-1 0 1 2} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{EOS} NbElem{1} Buffer{-1} Stack{} GRU{1 1 0 1} In{64} Out{64} + Focused : Column{DEPREL} NbElem{1} Buffer{} Stack{2 1 0} GRU{1 1 0 1} In{64} Out{64} + DepthLayerTree : Columns{DEPREL} Buffer{} Stack{2 1 0} LayerSizes{3} GRU{1 1 0.0 1} In{64} Out{64} InputDropout : 0.3 MLP : {2048 0.3} End Optimizer : Adam {0.0002 0.9 0.999 0.00000001 0.00001 true} } Predictions : UPOS FEATS HEAD DEPREL EOS -Strategy : sequential - tagger morpho 1 - morpho parser NOTHING 1 - morpho parser 0 - parser tagger SHIFT 1 - parser tagger RIGHT 1 - parser tagger 0 +Strategy +{ + Block : End{cannotMove} + tagger tagger * 1 + Block : End{cannotMove} + morpho morpho NOTHING 1 + morpho morpho * 0 + Block : End{cannotMove} + parser segmenter SHIFT 0 + parser segmenter RIGHT 0 + parser parser * 0 + segmenter parser * 1 +}