Fixed R2 computation in eval script

00844dd5 · Franck Dary · f9036439 · 00844dd5
Commit 00844dd5 authored Jan 12, 2021 by Franck Dary
--- a/scripts/conll18_ud_eval.py
+++ b/scripts/conll18_ud_eval.py
@@ -365,20 +365,21 @@ def load_conllu(file) :
 def evaluate(gold_ud, system_ud, extraColumns) :
  class Score :
    def __init__(self, gold_total, system_total, correct, aligned_total=None, isNumeric=False, R2=None) :
-      self.correct = correct
+      self.correct = correct[0]
      self.gold_total = gold_total
      self.system_total = system_total
      self.aligned_total = aligned_total
      if isNumeric :
        self.precision = 0
        self.recall = R2
-        self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0
+        self.f1 = correct[1] / gold_total if gold_total else 0.0
-        self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total
+        self.aligned_accuracy = correct[0] / gold_total if gold_total else 0.0
      else :
-        self.precision = 100*correct / system_total if system_total else 0.0
+        self.precision = 100*correct[0] / system_total if system_total else 0.0
-        self.recall = 100*correct / gold_total if gold_total else 0.0
+        self.recall = 100*correct[0] / gold_total if gold_total else 0.0
-        self.f1 = 2 * 100*correct / (system_total + gold_total) if system_total + gold_total else 0.0
+        self.f1 = 2 * 100*correct[0] / (system_total + gold_total) if system_total + gold_total else 0.0
-        self.aligned_accuracy = 100*correct / aligned_total if aligned_total else aligned_total
+        self.aligned_accuracy = 100*correct[0] / aligned_total if aligned_total else aligned_total
  class AlignmentWord :
    def __init__(self, gold_word, system_word) :
@@ -406,7 +407,7 @@ def evaluate(gold_ud, system_ud, extraColumns) :
        si += 1
        gi += 1
-    return [Score(len(gold_spans), len(system_spans), correct)]
+    return [Score(len(gold_spans), len(system_spans), [correct])]
  def alignment_score(alignment, key_fn=None, filter_fn=None) :
    if filter_fn is not None :
@@ -420,7 +421,7 @@ def evaluate(gold_ud, system_ud, extraColumns) :
    if key_fn is None :
      # Return score for whole aligned words
-      return [Score(gold, system, aligned)]
+      return [Score(gold, system, [aligned])]
    def gold_aligned_gold(word) :
      return word
@@ -434,7 +435,7 @@ def evaluate(gold_ud, system_ud, extraColumns) :
        if (not isinstance(systemItem, str) or '.' not in systemItem or not is_float(systemItem)) or  (not isinstance(goldItem, str) or '.' not in goldItem or not is_float(goldItem)) :
          isNumericOnly = False
-    correct = 0
+    correct = [0,0]
    errors = []
    goldValues = []
    predictedValues = []
@@ -444,23 +445,30 @@ def evaluate(gold_ud, system_ud, extraColumns) :
        systemItem = key_fn(words.system_word, gold_aligned_system)
        if not isNumericOnly :
          if goldItem == systemItem :
-            correct += 1
+            correct[0] += 1
          else :
            errors.append(words)
-        else :
+        # WARNING: this script ignore examples where gold value == 0.0
-          correct -= abs(float(goldItem) - float(systemItem))**2
+        elif float(goldItem) != 0.0 :
+          correct[0] -= abs(float(goldItem) - float(systemItem))**1
+          correct[1] -= abs(float(goldItem) - float(systemItem))**2
          goldValues.append(float(goldItem))
          predictedValues.append(float(systemItem))
    R2 = None
    if isNumericOnly :
      goldMean = sum(goldValues) / len(goldValues)
-      E1 = 0.0
+      predMean = sum(predictedValues) / len(predictedValues)
-      E2 = 0.0
+      numerator = 0.0
+      denom1 = 0.0
+      denom2 = 0.0
      for i in range(len(predictedValues)) :
-        E1 += (goldValues[i]-predictedValues[i])**2
+        numerator += (predictedValues[i]-predMean)*(goldValues[i]-goldMean)
-        E2 += (goldMean-predictedValues[i])**2
+        denom1 += (predictedValues[i]-predMean)**2
-      R2 = 1 - E1/E2
+        denom2 += (goldValues[i]-goldMean)**2
+      pearson = numerator/((denom1**0.5)*(denom2**0.5))
+      R2 = pearson**2
    return [Score(gold, system, correct, aligned, isNumeric=isNumericOnly, R2=R2), errors]
  def beyond_end(words, i, multiword_span_end) :