diff --git a/scripts/createKFolds.py b/scripts/createKFolds.py index 8ef39a3e08a0cab418a9e4baed8c279e1b705af2..fe2fe4acc30ae2226390fc0b26f5ccbc45878da5 100755 --- a/scripts/createKFolds.py +++ b/scripts/createKFolds.py @@ -17,6 +17,11 @@ if __name__ == "__main__" : random.seed(100) + corpusName = sys.argv[1] + while corpusName[-1] == '/' : + corpusName = corpusName[:-1] + corpusName = corpusName.split('/')[-1] + inputFiles = [sys.argv[1]+"/"+filename for filename in os.listdir(sys.argv[1]) if ".conllu" in filename] sentences = [] @@ -56,10 +61,11 @@ if __name__ == "__main__" : train = trainDev[:-testSize] dev = trainDev[-testSize:] - outDir = sys.argv[2]+"/"+sys.argv[1] + outDir = sys.argv[2]+"/"+corpusName while outDir[-1] == '/' : outDir = outDir[:-1] outDir = outDir + "_" + str(k) + print("Creating '%s'"%outDir, file=sys.stderr) os.makedirs(outDir, exist_ok=True) for sents, name in [(train, "train"), (dev, "dev"), (test, "test")] : with open(outDir + "/" + "%s.conllu"%name, "w") as outFile :