Something went wrong on our end
-
Franck Dary authoredFranck Dary authored
print_results.py 5.25 KiB
#! /usr/bin/env python3
import argparse
import glob
import sys
import math
import copy
################################################################################
if __name__ == "__main__" :
parser = argparse.ArgumentParser()
parser.add_argument("dir", type=str, default="",
help="Directory containing the .stdout (scores) files.")
parser.add_argument("--score", "-s", type=str, default="F1",
help="Name of the score to report (F1,L1,L2,R2).")
parser.add_argument("--metrics", type=str, default="Sentences,LAS,UAS,Lemmas,UFeats,UPOS,Words,Tokens",
help="Comma separated list of metrics.")
parser.add_argument("--mean", "-m", default=False, action="store_true",
help="compute the mean of metrics.")
args = parser.parse_args()
scoreTypes = {
"F1" : ["F1.score","%",-1,"%.2f"],
"R2" : ["R²","",-3,"%.4f"],
"L1" : ["L1","",-1,"%.2f"],
"L2" : ["L2","",-2,"%.2f"],
}
scoreType = scoreTypes[args.score.upper()]
metrics = args.metrics.split(',')
usedMetrics = []
mean = args.mean
output = []
outputByModelScore = dict()
filenamesErr = glob.iglob((args.dir+"/"if len(args.dir) > 1 else "") + '*stderr')
filenamesOut = glob.iglob((args.dir+"/"if len(args.dir) > 1 else "") + '*stdout')
for pathToFile in filenamesErr :
for line in open(pathToFile, "r") :
if "Error" in line or "ERROR" in line or "error" in line :
print(pathToFile,":", file=sys.stderr)
print("\t"+line,end="", file=sys.stderr)
for pathToFile in filenamesOut :
splited = pathToFile.split('/')[-1].split('.')
model = ".".join(splited[:-3])
corpus = splited[-3]
index = splited[-2]
if corpus not in outputByModelScore :
outputByModelScore[corpus] = dict()
for line in open(pathToFile, "r") :
for metric in metrics :
if metric in line and metric[0] == line[0]:
splited = line.strip().replace("|","").split()
if model not in outputByModelScore[corpus] :
outputByModelScore[corpus][model] = dict()
if metric not in outputByModelScore[corpus][model] :
outputByModelScore[corpus][model][metric] = []
if metric not in usedMetrics :
usedMetrics.append(metric)
outputByModelScore[corpus][model][metric].append([corpus, metric, splited[scoreType[2]], model])
for metric in metrics :
if metric not in usedMetrics :
print("WARNING : Unused metric '%s'"%metric, file=sys.stderr)
if mean :
metricName = ",".join(metrics)
for corpus in outputByModelScore :
for model in outputByModelScore[corpus] :
nbRedo = len((list(outputByModelScore[corpus][model].values())[0]))
newMetrics = copy.deepcopy(list(outputByModelScore[corpus][model].values())[0])
for elem in newMetrics :
elem[2] = 0
elem[1] = metricName
for redo in range(nbRedo) :
for metric in outputByModelScore[corpus][model] :
newMetrics[redo][2] += float(outputByModelScore[corpus][model][metric][redo][2])
newMetrics[redo][2] /= len(metrics)
outputByModelScore[corpus][model] = {metricName : newMetrics}
for corpus in outputByModelScore :
for model in outputByModelScore[corpus] :
for metric in outputByModelScore[corpus][model] :
score = 0.0
for exp in outputByModelScore[corpus][model][metric] :
score += float(exp[2])
score /= len(outputByModelScore[corpus][model][metric])
standardDeviation = 0.0
if len(outputByModelScore[corpus][model][metric]) > 1 :
for exp in outputByModelScore[corpus][model][metric] :
standardDeviation += (float(exp[2])-score)**2
standardDeviation /= len(outputByModelScore[corpus][model][metric])
standardDeviation = math.sqrt(standardDeviation)
baseScore = score
if float(scoreType[3]%standardDeviation) > 0 :
score = "%s[±%s]%%s"%(scoreType[3],scoreType[3])%(score,standardDeviation,scoreType[1])
else :
score = "%s%%s"%scoreType[3]%(score, scoreType[1])
if '-' in score :
score = score.replace('-','')
output.append(outputByModelScore[corpus][model][metric][0])
output[-1][2] = score
output[-1] = [output[-1][0]] + ([0] if mean else [metrics.index(output[-1][1])]) + [output[-1][1]] + [baseScore] + output[-1][2:]
if len(output) == 0 :
print("ERROR : Output length is 0", file=sys.stderr)
print(" did you run evaluate.sh ?", file=sys.stderr)
print("USAGE : %s [directory of .stdout files]"%sys.argv[0], file=sys.stderr)
exit(1)
output.sort()
output = [[val[0]] + [val[2]] + val[4:] for val in output]
maxColLens = [0 for _ in range(len(output[0]))]
output = [["Corpus","Metric",scoreType[0],"Model"]] + output
for line in output :
for i in range(len(line)) :
maxColLens[i] = max(maxColLens[i], len(str(line[i])))
dashLine = '-' * 80
for i in range(len(output)) :
if i > 0 and output[i][0] != output[i-1][0] :
print(dashLine)
elif i > 0 and output[i][1] != output[i-1][1] :
print("")
for j in range(len(output[i])) :
padding = (' '*(maxColLens[j]-len(str(output[i][j]))))+" "*3
print(output[i][j], end=padding)
print("")
################################################################################