Skip to content
Snippets Groups Projects
Commit b49eaa88 authored by Franck Dary's avatar Franck Dary
Browse files

Added --ignore option to evaluation script

parent 7c4e0bb7
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
LANG=fr LANG=fr
MCF=../data/test.mcf MCF=../data/test.mcf
MCD=../data/wpmlgfs.mcd MCD=../data/wpmlgfs.mcd
ARGS="--keepPunct EOS --relative LABEL GOV" ARGS="--keepPunct EOS --relative LABEL GOV --ignore FORM"
exec ../../scripts/eval.py $LANG $MCF $MCD $* $ARGS exec ../../scripts/eval.py $LANG $MCF $MCD $* $ARGS
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
LANG=fr_orpheo LANG=fr_orpheo
MCF=../data/test.mcf MCF=../data/test.mcf
MCD=../data/wpmlgfs.mcd MCD=../data/wpmlgfs.mcd
ARGS="--keepPunct EOS --relative LABEL GOV" ARGS="--keepPunct EOS --relative LABEL GOV --ignore FORM --ignore SILENCE --ignore SPKRCHANGE"
exec ../../scripts/eval.py $LANG $MCF $MCD $* $ARGS exec ../../scripts/eval.py $LANG $MCF $MCD $* $ARGS
...@@ -42,6 +42,7 @@ def printUsageAndExit() : ...@@ -42,6 +42,7 @@ def printUsageAndExit() :
print("\t--printHeader\n\t\tPrint each column name",file=sys.stderr) print("\t--printHeader\n\t\tPrint each column name",file=sys.stderr)
print("\t--keepPunct columnName\n\t\tDo not ignore punctuation for the column 'columnName'",file=sys.stderr) print("\t--keepPunct columnName\n\t\tDo not ignore punctuation for the column 'columnName'",file=sys.stderr)
print("\t--keepPunctAll\n\t\tDo not ignore punctuation",file=sys.stderr) print("\t--keepPunctAll\n\t\tDo not ignore punctuation",file=sys.stderr)
print("\t--ignore column\n\t\tIgnore column",file=sys.stderr)
print("\t--details column\n\t\tPrint per label details for column",file=sys.stderr) print("\t--details column\n\t\tPrint per label details for column",file=sys.stderr)
print("\t--toolname name\n\t\tSpecify the name of the tool being tested",file=sys.stderr) print("\t--toolname name\n\t\tSpecify the name of the tool being tested",file=sys.stderr)
print("\t--relative column1 column2\n\t\tFor each line, consider that 'column1' was correctly predicted only if 'column2' was also correctly predicted",file=sys.stderr) print("\t--relative column1 column2\n\t\tFor each line, consider that 'column1' was correctly predicted only if 'column2' was also correctly predicted",file=sys.stderr)
...@@ -52,6 +53,7 @@ def readMCD(mcdFilename) : ...@@ -52,6 +53,7 @@ def readMCD(mcdFilename) :
columnIndexes = {} columnIndexes = {}
ignorePunct = {} ignorePunct = {}
details = {} details = {}
ignore = {}
for line in open(mcdFilename, "r", encoding="utf-8") : for line in open(mcdFilename, "r", encoding="utf-8") :
if len(line) == 0 : if len(line) == 0 :
continue continue
...@@ -71,8 +73,9 @@ def readMCD(mcdFilename) : ...@@ -71,8 +73,9 @@ def readMCD(mcdFilename) :
columnIndexes[split[1]] = int(split[0]) columnIndexes[split[1]] = int(split[0])
ignorePunct[split[1]] = True ignorePunct[split[1]] = True
details[split[1]] = False details[split[1]] = False
ignore[split[1]] = False
return columnNames, columnIndexes, ignorePunct, details return columnNames, columnIndexes, ignorePunct, details, ignore
def readMCF(mcfFilename) : def readMCF(mcfFilename) :
columns = [] columns = []
...@@ -144,12 +147,14 @@ def getResultsPerColumn(columnNames, gold, pred, vocabPerColumn, ignorePunct, re ...@@ -144,12 +147,14 @@ def getResultsPerColumn(columnNames, gold, pred, vocabPerColumn, ignorePunct, re
return columns return columns
def printHeader(columnNames, relativeTo) : def printHeader(columnNames, relativeTo, ignore) :
keys = list(columnNames.keys()) keys = list(columnNames.keys())
keys.sort() keys.sort()
print("tool"+" "*(COLSIZE-len("tool")),end=" ") print("tool"+" "*(COLSIZE-len("tool")),end=" ")
for key in keys : for key in keys :
name = columnNames[key] name = columnNames[key]
if ignore[name] :
continue
if key in relativeTo : if key in relativeTo :
name += "|" + columnNames[relativeTo[key]] name += "|" + columnNames[relativeTo[key]]
print(name+" "*(COLSIZE-len(name)),end=" ") print(name+" "*(COLSIZE-len(name)),end=" ")
...@@ -188,7 +193,7 @@ goldFilename = sys.argv[2] ...@@ -188,7 +193,7 @@ goldFilename = sys.argv[2]
predFilename = sys.argv[3] predFilename = sys.argv[3]
mustPrintHeader = False mustPrintHeader = False
columnNames, columnIndexes, ignorePunct, details = readMCD(mcdFilename) columnNames, columnIndexes, ignorePunct, details, ignore = readMCD(mcdFilename)
gold = readMCF(goldFilename) gold = readMCF(goldFilename)
pred = readMCF(predFilename) pred = readMCF(predFilename)
...@@ -231,6 +236,18 @@ while i < len(sys.argv) : ...@@ -231,6 +236,18 @@ while i < len(sys.argv) :
print("Invalid column name '%s' for argument '%s'"%(colName, arg),file=sys.stderr) print("Invalid column name '%s' for argument '%s'"%(colName, arg),file=sys.stderr)
printUsageAndExit() printUsageAndExit()
i += 1 i += 1
if arg == "--ignore" :
goodArg = True
if i+1 >= len(sys.argv) :
print("Missing argument value '%s'"%arg,file=sys.stderr)
printUsageAndExit()
colName = sys.argv[i+1]
if colName in ignore :
ignore[colName] = True
else :
print("Invalid column name '%s' for argument '%s'"%(colName, arg),file=sys.stderr)
printUsageAndExit()
i += 1
if arg == "--toolname" : if arg == "--toolname" :
goodArg = True goodArg = True
if i+1 >= len(sys.argv) : if i+1 >= len(sys.argv) :
...@@ -256,7 +273,7 @@ while i < len(sys.argv) : ...@@ -256,7 +273,7 @@ while i < len(sys.argv) :
i += 1 i += 1
if mustPrintHeader : if mustPrintHeader :
printHeader(columnNames,relativeTo) printHeader(columnNames,relativeTo,ignore)
cutMCD(columnNames, columnIndexes, pred) cutMCD(columnNames, columnIndexes, pred)
...@@ -266,6 +283,8 @@ results = getResultsPerColumn(columnNames, gold, pred, vocabPerColumn, ignorePun ...@@ -266,6 +283,8 @@ results = getResultsPerColumn(columnNames, gold, pred, vocabPerColumn, ignorePun
print(toolName+" "*(COLSIZE-len(toolName)),end=" ") print(toolName+" "*(COLSIZE-len(toolName)),end=" ")
for column in results : for column in results :
if ignore[column.name] :
continue
tmp = "%.2f%%" % column.acc tmp = "%.2f%%" % column.acc
print(tmp+" "*(COLSIZE-len(tmp)),end=" ") print(tmp+" "*(COLSIZE-len(tmp)),end=" ")
print("") print("")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment