#! /usr/bin/env python3 import argparse import glob import sys import math import copy ################################################################################ if __name__ == "__main__" : parser = argparse.ArgumentParser() parser.add_argument("dir", type=str, default="", help="Directory containing the .stdout (scores) files.") parser.add_argument("--score", "-s", type=str, default="F1", help="Name of the score to report (F1,L1,L2,R2).") parser.add_argument("--metrics", type=str, default="Sentences,LAS,UAS,Lemmas,UFeats,UPOS,Words,Tokens", help="Comma separated list of metrics.") parser.add_argument("--mean", "-m", default=False, action="store_true", help="compute the mean of metrics.") args = parser.parse_args() scoreTypes = { "F1" : ["F1.score","%",-1,"%.2f"], "R2" : ["R²","",-3,"%.4f"], "L1" : ["L1","",-1,"%.2f"], "L2" : ["L2","",-2,"%.2f"], } scoreType = scoreTypes[args.score.upper()] metrics = args.metrics.split(',') usedMetrics = [] mean = args.mean output = [] outputByModelScore = dict() filenamesErr = glob.iglob((args.dir+"/"if len(args.dir) > 1 else "") + '*stderr') filenamesOut = glob.iglob((args.dir+"/"if len(args.dir) > 1 else "") + '*stdout') for pathToFile in filenamesErr : for line in open(pathToFile, "r") : if "Error" in line or "ERROR" in line or "error" in line : print(pathToFile,":", file=sys.stderr) print("\t"+line,end="", file=sys.stderr) for pathToFile in filenamesOut : splited = pathToFile.split('/')[-1].split('.') model = ".".join(splited[:-3]) corpus = splited[-3] index = splited[-2] if corpus not in outputByModelScore : outputByModelScore[corpus] = dict() for line in open(pathToFile, "r") : for metric in metrics : if metric in line and metric[0] == line[0]: splited = line.strip().replace("|","").split() if model not in outputByModelScore[corpus] : outputByModelScore[corpus][model] = dict() if metric not in outputByModelScore[corpus][model] : outputByModelScore[corpus][model][metric] = [] if metric not in usedMetrics : usedMetrics.append(metric) outputByModelScore[corpus][model][metric].append([corpus, metric, splited[scoreType[2]], model]) for metric in metrics : if metric not in usedMetrics : print("WARNING : Unused metric '%s'"%metric, file=sys.stderr) if mean : metricName = ",".join(metrics) for corpus in outputByModelScore : for model in outputByModelScore[corpus] : nbRedo = len((list(outputByModelScore[corpus][model].values())[0])) newMetrics = copy.deepcopy(list(outputByModelScore[corpus][model].values())[0]) for elem in newMetrics : elem[2] = 0 elem[1] = metricName for redo in range(nbRedo) : for metric in outputByModelScore[corpus][model] : newMetrics[redo][2] += float(outputByModelScore[corpus][model][metric][redo][2]) newMetrics[redo][2] /= len(metrics) outputByModelScore[corpus][model] = {metricName : newMetrics} for corpus in outputByModelScore : for model in outputByModelScore[corpus] : for metric in outputByModelScore[corpus][model] : score = 0.0 for exp in outputByModelScore[corpus][model][metric] : score += float(exp[2]) score /= len(outputByModelScore[corpus][model][metric]) standardDeviation = 0.0 if len(outputByModelScore[corpus][model][metric]) > 1 : for exp in outputByModelScore[corpus][model][metric] : standardDeviation += (float(exp[2])-score)**2 standardDeviation /= len(outputByModelScore[corpus][model][metric]) standardDeviation = math.sqrt(standardDeviation) baseScore = score if float(scoreType[3]%standardDeviation) > 0 : score = "%s[±%s]%%s"%(scoreType[3],scoreType[3])%(score,standardDeviation,scoreType[1]) else : score = "%s%%s"%scoreType[3]%(score, scoreType[1]) if '-' in score : score = score.replace('-','') output.append(outputByModelScore[corpus][model][metric][0]) output[-1][2] = score output[-1] = [output[-1][0]] + ([0] if mean else [metrics.index(output[-1][1])]) + [output[-1][1]] + [baseScore] + output[-1][2:] if len(output) == 0 : print("ERROR : Output length is 0", file=sys.stderr) print(" did you run evaluate.sh ?", file=sys.stderr) print("USAGE : %s [directory of .stdout files]"%sys.argv[0], file=sys.stderr) exit(1) output.sort() output = [[val[0]] + [val[2]] + val[4:] for val in output] maxColLens = [0 for _ in range(len(output[0]))] output = [["Corpus","Metric",scoreType[0],"Model"]] + output for line in output : for i in range(len(line)) : maxColLens[i] = max(maxColLens[i], len(str(line[i]))) dashLine = '-' * 80 for i in range(len(output)) : if i > 0 and output[i][0] != output[i-1][0] : print(dashLine) elif i > 0 and output[i][1] != output[i-1][1] : print("") for j in range(len(output[i])) : padding = (' '*(maxColLens[j]-len(str(output[i][j]))))+" "*3 print(output[i][j], end=padding) print("") ################################################################################