Skip to content
Snippets Groups Projects
Commit 00844dd5 authored by Franck Dary's avatar Franck Dary
Browse files

Fixed R2 computation in eval script

parent f9036439
Branches
No related tags found
No related merge requests found
...@@ -365,20 +365,21 @@ def load_conllu(file) : ...@@ -365,20 +365,21 @@ def load_conllu(file) :
def evaluate(gold_ud, system_ud, extraColumns) : def evaluate(gold_ud, system_ud, extraColumns) :
class Score : class Score :
def __init__(self, gold_total, system_total, correct, aligned_total=None, isNumeric=False, R2=None) : def __init__(self, gold_total, system_total, correct, aligned_total=None, isNumeric=False, R2=None) :
self.correct = correct self.correct = correct[0]
self.gold_total = gold_total self.gold_total = gold_total
self.system_total = system_total self.system_total = system_total
self.aligned_total = aligned_total self.aligned_total = aligned_total
if isNumeric : if isNumeric :
self.precision = 0 self.precision = 0
self.recall = R2 self.recall = R2
self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0 self.f1 = correct[1] / gold_total if gold_total else 0.0
self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total self.aligned_accuracy = correct[0] / gold_total if gold_total else 0.0
else : else :
self.precision = 100*correct / system_total if system_total else 0.0 self.precision = 100*correct[0] / system_total if system_total else 0.0
self.recall = 100*correct / gold_total if gold_total else 0.0 self.recall = 100*correct[0] / gold_total if gold_total else 0.0
self.f1 = 2 * 100*correct / (system_total + gold_total) if system_total + gold_total else 0.0 self.f1 = 2 * 100*correct[0] / (system_total + gold_total) if system_total + gold_total else 0.0
self.aligned_accuracy = 100*correct / aligned_total if aligned_total else aligned_total self.aligned_accuracy = 100*correct[0] / aligned_total if aligned_total else aligned_total
class AlignmentWord : class AlignmentWord :
def __init__(self, gold_word, system_word) : def __init__(self, gold_word, system_word) :
...@@ -406,7 +407,7 @@ def evaluate(gold_ud, system_ud, extraColumns) : ...@@ -406,7 +407,7 @@ def evaluate(gold_ud, system_ud, extraColumns) :
si += 1 si += 1
gi += 1 gi += 1
return [Score(len(gold_spans), len(system_spans), correct)] return [Score(len(gold_spans), len(system_spans), [correct])]
def alignment_score(alignment, key_fn=None, filter_fn=None) : def alignment_score(alignment, key_fn=None, filter_fn=None) :
if filter_fn is not None : if filter_fn is not None :
...@@ -420,7 +421,7 @@ def evaluate(gold_ud, system_ud, extraColumns) : ...@@ -420,7 +421,7 @@ def evaluate(gold_ud, system_ud, extraColumns) :
if key_fn is None : if key_fn is None :
# Return score for whole aligned words # Return score for whole aligned words
return [Score(gold, system, aligned)] return [Score(gold, system, [aligned])]
def gold_aligned_gold(word) : def gold_aligned_gold(word) :
return word return word
...@@ -434,7 +435,7 @@ def evaluate(gold_ud, system_ud, extraColumns) : ...@@ -434,7 +435,7 @@ def evaluate(gold_ud, system_ud, extraColumns) :
if (not isinstance(systemItem, str) or '.' not in systemItem or not is_float(systemItem)) or (not isinstance(goldItem, str) or '.' not in goldItem or not is_float(goldItem)) : if (not isinstance(systemItem, str) or '.' not in systemItem or not is_float(systemItem)) or (not isinstance(goldItem, str) or '.' not in goldItem or not is_float(goldItem)) :
isNumericOnly = False isNumericOnly = False
correct = 0 correct = [0,0]
errors = [] errors = []
goldValues = [] goldValues = []
predictedValues = [] predictedValues = []
...@@ -444,23 +445,30 @@ def evaluate(gold_ud, system_ud, extraColumns) : ...@@ -444,23 +445,30 @@ def evaluate(gold_ud, system_ud, extraColumns) :
systemItem = key_fn(words.system_word, gold_aligned_system) systemItem = key_fn(words.system_word, gold_aligned_system)
if not isNumericOnly : if not isNumericOnly :
if goldItem == systemItem : if goldItem == systemItem :
correct += 1 correct[0] += 1
else : else :
errors.append(words) errors.append(words)
else : # WARNING: this script ignore examples where gold value == 0.0
correct -= abs(float(goldItem) - float(systemItem))**2 elif float(goldItem) != 0.0 :
correct[0] -= abs(float(goldItem) - float(systemItem))**1
correct[1] -= abs(float(goldItem) - float(systemItem))**2
goldValues.append(float(goldItem)) goldValues.append(float(goldItem))
predictedValues.append(float(systemItem)) predictedValues.append(float(systemItem))
R2 = None R2 = None
if isNumericOnly : if isNumericOnly :
goldMean = sum(goldValues) / len(goldValues) goldMean = sum(goldValues) / len(goldValues)
E1 = 0.0 predMean = sum(predictedValues) / len(predictedValues)
E2 = 0.0 numerator = 0.0
denom1 = 0.0
denom2 = 0.0
for i in range(len(predictedValues)) : for i in range(len(predictedValues)) :
E1 += (goldValues[i]-predictedValues[i])**2 numerator += (predictedValues[i]-predMean)*(goldValues[i]-goldMean)
E2 += (goldMean-predictedValues[i])**2 denom1 += (predictedValues[i]-predMean)**2
R2 = 1 - E1/E2 denom2 += (goldValues[i]-goldMean)**2
pearson = numerator/((denom1**0.5)*(denom2**0.5))
R2 = pearson**2
return [Score(gold, system, correct, aligned, isNumeric=isNumericOnly, R2=R2), errors] return [Score(gold, system, correct, aligned, isNumeric=isNumericOnly, R2=R2), errors]
def beyond_end(words, i, multiword_span_end) : def beyond_end(words, i, multiword_span_end) :
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment