diff --git a/thesis/chapter4/launchBatchesMultiling.py b/thesis/chapter4/launchBatchesMultiling.py new file mode 100755 index 0000000000000000000000000000000000000000..2bfeeb9e2f83c1fd263e7b8e5c8898c7973039a6 --- /dev/null +++ b/thesis/chapter4/launchBatchesMultiling.py @@ -0,0 +1,63 @@ +#! /usr/bin/env python3 + +from launchslurmarray import launchSlurmArray +import os +import re + +names = [] +commands = [] + +prefix = "multiling/" +pretrained = "--pretrained FORM,embeddings/cc.%s.300.vec" + +isEval = True +output = "outputMultiling/" + +def finished(name) : + d = prefix+name + f = "%s/train.log"%d + if not os.path.isfile(f) : + return False + fromto = None + for line in open(f, "r") : + epoch = re.search(r"\d+/\d+", line) + if epoch is not None : + fromto = epoch.group(0).split("/") + return fromto is not None and fromto[0] == fromto[1] + +for lang in [ + ("English-GUM", "en"), + ("German-HDT", "de"), + ("Russian-SynTagRus", "ru"), + ("Arabic-PADT", "ar"), + ("Romanian-RRT", "ro"), + ("Chinese-GSD", "zh"), + ("French-GSD", "fr"), + ] : + for mode in ["tagger", "eager", "tagparser"] : + name = "%s_%s_incr_rlb"%(lang[0], mode) + if not finished(name) and not isEval : + names.append(name) + commands.append('./main.py train rl data/UD_%s/*train*.conllu %s%s --dev data/UD_%s/*dev*.conllu --transitions %s -n 300 --silent --incr %s'%(lang[0], prefix, name, lang[0], mode+"bt", pretrained%lang[1])) + if isEval : + names.append("eval_"+name) + commands.append('./main.py decode model data/UD_%s/*test*.conllu %s%s --silent -d > %s%s.conllu 2> %s%s.trace'%(lang[0], prefix, name, output, name, output, name)) + + name = "%s_%s_incr_rl"%(lang[0], mode) + if not finished(name) and not isEval : + names.append(name) + commands.append('./main.py train rl data/UD_%s/*train*.conllu %s%s --dev data/UD_%s/*dev*.conllu --transitions %s -n 300 --silent --incr %s'%(lang[0], prefix, name, lang[0], mode, pretrained%lang[1])) + if isEval : + names.append("eval_"+name) + commands.append('./main.py decode model data/UD_%s/*test*.conllu %s%s --silent -d > %s%s.conllu 2> %s%s.trace'%(lang[0], prefix, name, output, name, output, name)) + + name = "%s_%s_incr_sul"%(lang[0], mode) + if not finished(name) and not isEval : + names.append(name) + commands.append('./main.py train oracle data/UD_%s/*train*.conllu %s%s --dev data/UD_%s/*dev*.conllu --transitions %s -n 200 --silent --incr %s --bootstrap 2'%(lang[0], prefix, name, lang[0], mode, pretrained%lang[1])) + if isEval : + names.append("eval_"+name) + commands.append('./main.py decode model data/UD_%s/*test*.conllu %s%s --silent -d > %s%s.conllu 2> %s%s.trace'%(lang[0], prefix, name, output, name, output, name)) + +launchSlurmArray(names, commands, "%stacl_rl_multiling"%("eval_" if isEval else ""), "gpu", 100, 9, 2) +