diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index 9fb308c20ecb21594e8ecc8abf870b82d9cfc4b2..0216349fa428f09def44962c68c0e23c6a6508ec 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -36,6 +36,7 @@ columns: all_no_test.conllu $(MCD) cat all_no_test.conllu | sed '/^#/ d' | cut -f$$number | sort --unique > col_$$number.txt ; \ done ./getTransitionSets.py $(MCD) col_*\.txt + cat tagger.ts parser.ts > taggerparser.ts texts: ./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILEs) $(TEST_FILES) diff --git a/UD_any/data/getTransitionSets.py b/UD_any/data/getTransitionSets.py index 1ee4d745048ff45f9bb25894a04f6aeb7cec81d6..7a98d7cf0b37b91639085db646c29dd2b41be5fc 100755 --- a/UD_any/data/getTransitionSets.py +++ b/UD_any/data/getTransitionSets.py @@ -31,7 +31,7 @@ if __name__ == "__main__" : striped = line.strip() if len(striped) == 0 : continue - print("WRITE b.0 UPOS " + striped, file=output) + print("<tagger> WRITE b.0 UPOS " + striped, file=output) output.close() elif nameCol == "XPOS" : @@ -90,7 +90,7 @@ if __name__ == "__main__" : print("SHIFT", file=output) output.close() output = open("parser.ts", 'w', encoding='utf-8') - print("REDUCE", file=output) + print("<parser> REDUCE", file=output) labels = set() labelsList = [] for line in open(colFile, "r", encoding='utf-8') : @@ -103,8 +103,9 @@ if __name__ == "__main__" : labelsList.append(striped) labelsList.sort() for label in labelsList : - print("LEFT " + label, file=output) - print("RIGHT " + label, file=output) - print("SHIFT", file=output) + print("<parser> LEFT " + label, file=output) + print("<parser> RIGHT " + label, file=output) + print("<parser> EOS", file=output) + print("<parser> SHIFT", file=output) output.close()