diff --git a/scripts/compileCrossValid.py b/scripts/compileCrossValid.py new file mode 100755 index 0000000000000000000000000000000000000000..4bdf17bace8531f6bdbd91664cfc963132ece523 --- /dev/null +++ b/scripts/compileCrossValid.py @@ -0,0 +1,59 @@ +#! /usr/bin/env python3 + +import sys +import os + +################################################################################ +def printUsageAndExit() : + print("USAGE : %s bin/ predOutputDir/ goldOutputDir/"%sys.argv[0], file=sys.stderr) + exit(1) +################################################################################ + +def getModelModelIndexFilename(d, f) : + model = ".".join(d[0].split('.')[:-1]).split('/')[-1] + modelIndex = int(model.split('_')[-1]) + model = "_".join(model.split('_')[:-1]) + filename = d[0]+"/"+f + return model, modelIndex, filename + +################################################################################ +if __name__ == "__main__" : + if len(sys.argv) != 4 : + printUsageAndExit() + + binDir = sys.argv[1] + outputDir = sys.argv[2] + goldOutputDir = sys.argv[3] + + filesByModel = {} + + for d in os.walk(binDir) : + for f in d[2] : + if "predicted_eval" in f : + model, index, filename = getModelModelIndexFilename(d, f) + if model not in filesByModel : + filesByModel[model] = [] + while len(filesByModel[model]) <= index : + filesByModel[model].append(["",""]) + filesByModel[model][index][0] = filename + if "test.conllu" in f : + model, index, filename = getModelModelIndexFilename(d, f) + if model not in filesByModel : + filesByModel[model] = [] + while len(filesByModel[model]) <= index : + filesByModel[model].append(["",""]) + filesByModel[model][index][1] = filename + + with open(goldOutputDir+"/corpus.conllu", "w") as out : + for f in filesByModel[list(filesByModel.keys())[0]] : + for line in open(f[1], "r") : + print(line, end="", file=out) + + for model in filesByModel : + outDir = outputDir+"/"+model + os.makedirs(outDir, exist_ok=True) + with open(outDir+"/corpus.conllu", "w") as out : + for f in filesByModel[model] : + for line in open(f[0], "r") : + print(line, end="", file=out) +################################################################################