readTrace.py

#! /usr/bin/env python3

import sys
import argparse

backerState = None
################################################################################
def setBackerState(value) :
  global backerState
  backerState = value
################################################################################

################################################################################
def getBackerState() :
  global backerState
  return backerState
################################################################################

################################################################################
def lenLine() :
  return 40
################################################################################

################################################################################
def englobStr(s, symbol, totalLen) :
  s = " %s "%s
  df = totalLen - len(s)
  return "%s%s%s"%(symbol*(df//2),s,symbol*(df-2*df//2+df//2))
################################################################################

################################################################################
def isBack(action) :
  return "BACK" in action and "NOBACK" not in action
################################################################################

################################################################################
def isParser(action) :
  return "SHIFT" in action or "REDUCE" in action or "LEFT" in action or "RIGHT" in action
################################################################################

################################################################################
def simple(action) :
  if "TAG" in action :
    return action.split()[-1]
  elif "RIGHT" in action or "LEFT" in action :
    return action.split()[0]
  return action
################################################################################

################################################################################
class Step() :
#-------------------------------------------------------------------------------
  def __init__(self) :
    self.state = None
    self.action = None
    self.scores = None
    self.costs = None
    self.oracleAction = None
    self.actionScore = None
    self.actionCost = None
    self.oracleScore = None
    self.stack = None
    self.historyPop = None
    self.history = None
    self.word = None

    self.distance = 0
    self.oracleIndex = 0
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def __str__(self) :
    action = " ".join(["%.2f@%s"%(c[0],simple(c[1])) for c in self.scores[:args.nbScores]])
    if self.actionCost > self.oracleCost :
      action = "%s CORR(%s)"%(action, simple(self.oracleAction))
    return action
#-------------------------------------------------------------------------------
################################################################################

################################################################################
class Block() :
#-------------------------------------------------------------------------------
  def __init__(self, state) :
    self.state = state
    self.versions = [] # List of list of steps
    self.stats = [] # For each version, dict of stats
    self.newVersion()
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def addStep(self, step) :
    self.versions[-1].append(step)
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def newVersion(self) :
    self.versions.append([])
    self.stats.append({
      "nbErr" : 0,
      "avgDist" : 0.0,
      "avgIndex" : 0.0,
      "maxIndex" : 0.0,
      "maxDist" : 0.0,
      })
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def nbVersions(self) :
    return len(self.versions)
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def getAsLines(self, maxNbVersions) :
    output = []
    versions = []
    for v in range(len(self.versions)) :
      version = self.versions[v]
      stats = self.stats[v]
      versions.append([])
      englobChar = "-"
      if len(version) > 0 and version[0].actionCost > version[0].oracleCost :
        englobChar = "~"
      lineStr = englobStr("" if len(version) == 0 else version[0].word, englobChar, lenLine())
      versions[-1].append(lineStr + (lenLine()-len(lineStr))*" ")
      for step in version :
        versions[-1].append(str(step) + (lenLine()-len(str(step)))*" ")
    maxIndex = max([len(version) for version in versions])
    for i in range(maxIndex) :
      output.append("")
      for j in range(maxNbVersions) :
        output[-1] += ("\t" if j > 0 else "") + (versions[j][i] if j in range(len(versions)) and i in range(len(versions[j])) else lenLine()*" ")

    return output
#-------------------------------------------------------------------------------
################################################################################

################################################################################
class History() :
#-------------------------------------------------------------------------------
  def __init__(self) :
    self.sentences = []
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def segmentInBlocks(self) :
    #structure : sentence = [annotations,list of blocks]
    sentences = []
    for sentenceAnnot in self.sentences :
      annot = sentenceAnnot[0]
      sentence = sentenceAnnot[1]
      lastState = None
      sentences.append([annot, []])
      blockIndex = 0
      for step in sentence :
        if lastState is not None and lastState != step.state :
          blockIndex += 1
        if blockIndex >= len(sentences[-1][1]) :
          sentences[-1][1].append(Block(step.state))
        block = sentences[-1][1][blockIndex]
        block.addStep(step)
        lastState = step.state
        if isBack(step.action) :
          backSize = int(step.action.split()[-1])
          setBackerState(step.state)
          while backSize > 0 :
            blockIndex -= 1
            state = sentences[-1][1][blockIndex].state
            if state == getBackerState() :
              backSize -= 1
          for block in sentences[-1][1][blockIndex:] :
            block.newVersion()

    self.sentences = sentences
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def computeStats(self) :
    globalStats = {
      "nbWords" : 0,
      "nbArcs" : 0,
      "nbMissedArcs" : 0,
      "nbActions" : 0,
      "nbActionsNormal" : 0,
      "nbActionsParser" : 0,
      "nbErr" : 0,
      "nbErrParser" : 0,
      "avgErrCost" : 0,
      "avgErrCostParser" : 0,
      "nbErrFound" : 0,
      "nbBack" : 0,
      "backOnErr" : 0,
      "actionAccuracy" : 0,
      "actionAccuracyParser" : 0,
      "arcsAccuracy" : 0,
      "backPrecision" : 0.0,
      "backRecall" : 0.0,
      "backFScore" : 0.0,
      "nbRedone" : 0,
      "nbRedoneDiminishErr" : 0,
      "nbRedoneAugmentErr" : 0,
      "redoneAvgErrChange" : 0,
      "nbRedoneErrErr" : 0,
      "nbRedoneErrCorrect" : 0,
      "nbRedoneCorrectErr" : 0,
      "nbRedoneCorrectCorrect" : 0,
      "redoneErrErrAvgDistChange" : 0.0,
      "redoneErrErrAvgIndexChange" : 0.0,
    }
    for sentence in self.sentences :
      globalStats["nbWords"] += len(sentence[0])
      globalStats["nbArcs"] += len(sentence[0]) - 1
      for block in sentence[1] :

        for i in range(len(block.versions)) :
          version = block.versions[i]
          stats = block.stats[i]
          if i == 0 :
            globalStats["nbActions"] += len(version)
          if block.state == getBackerState() :
            continue
          for step in version :
            step.distance = abs(step.actionScore-step.oracleScore)
            step.oracleIndex = [a[1] for a in step.scores].index(step.oracleAction)
            if i == 0 :
              globalStats["avgErrCost"] += step.actionCost
              if isParser(step.action) :
                globalStats["avgErrCostParser"] += step.actionCost
                globalStats["nbMissedArcs"] += step.actionCost
                globalStats["nbActionsParser"] += 1
                if step.actionCost > step.oracleCost :
                  globalStats["nbErrParser"] += 1
            if step.actionCost > step.oracleCost :
              stats["nbErr"] += 1
              stats["avgDist"] += step.distance
              stats["avgIndex"] += step.oracleIndex
              stats["maxDist"] = max(stats["maxDist"], step.distance)
              stats["maxIndex"] = max(stats["maxIndex"], step.oracleIndex)
          if i == 0 :
            globalStats["nbActionsNormal"] += len(version)
            globalStats["nbErr"] += stats["nbErr"]
            if len(block.versions) > 1 :
              globalStats["nbErrFound"] += stats["nbErr"]
          if i == 1 :
            prevStats = block.stats[i-1]
            globalStats["nbRedone"] += 1
            distChange = prevStats["maxDist"] - stats["maxDist"]
            indexChange = prevStats["maxIndex"] - stats["maxIndex"]
            if prevStats["nbErr"] > 0 and  stats["nbErr"] > 0 :
              globalStats["nbRedoneErrErr"] += 1
              globalStats["redoneErrErrAvgDistChange"] += distChange
              globalStats["redoneErrErrAvgIndexChange"] += indexChange
            if prevStats["nbErr"] == 0 and  stats["nbErr"] > 0 :
              globalStats["nbRedoneCorrectErr"] += 1
            if prevStats["nbErr"] == 0 and  stats["nbErr"] == 0 :
              globalStats["nbRedoneCorrectCorrect"] += 1
            if prevStats["nbErr"] > 0 and  stats["nbErr"] == 0 :
              globalStats["nbRedoneErrCorrect"] += 1
            if prevStats["nbErr"] > stats["nbErr"] :
              globalStats["nbRedoneDiminishErr"] += 1
            if prevStats["nbErr"] < stats["nbErr"] :
              globalStats["nbRedoneAugmentErr"] += 1
            globalStats["redoneAvgErrChange"] += stats["nbErr"] - prevStats["nbErr"]
          if stats["nbErr"] > 0 :
            stats["avgDist"] /= stats["nbErr"]
            stats["avgIndex"] /= stats["nbErr"]

    for sentence in self.sentences :
      b = 0
      while b in range(len(sentence[1])) :
        block = sentence[1][b]
        if block.state != getBackerState() or not isBack(block.versions[0][0].action) :
          b += 1
          continue
        backSize = int(block.versions[0][0].action.split()[1])
        globalStats["nbBack"] += 1
        backOnErr = False
        oldB = b
        b -= 1
        while b in range(len(sentence[1])) and backSize > 0 :
          if sentence[1][b].stats[0]["nbErr"] > 0 :
            backOnErr = True
          b -= 1
          if sentence[1][b].state == getBackerState() :
            backSize -= 1
        b = oldB + 1
        if backOnErr :
          globalStats["backOnErr"] += 1
    if globalStats["nbActionsNormal"] > 0 :
      globalStats["actionAccuracy"] = 100.0*(globalStats["nbActionsNormal"]-globalStats["nbErr"])/globalStats["nbActionsNormal"]
    if globalStats["nbActionsParser"] > 0 :
      globalStats["actionAccuracyParser"] = 100.0*(globalStats["nbActionsParser"]-globalStats["nbErrParser"])/globalStats["nbActionsParser"]
    if globalStats["nbArcs"] > 0 :
      globalStats["arcsAccuracy"] = 100.0*(globalStats["nbArcs"]-globalStats["nbMissedArcs"])/globalStats["nbArcs"]
    if globalStats["nbErr"] > 0 :
      globalStats["avgErrCost"] /= globalStats["nbErr"]
    if globalStats["nbErrParser"] > 0 :
      globalStats["avgErrCostParser"] /= globalStats["nbErrParser"]
    if globalStats["nbErr"] > 0 :
      globalStats["backRecall"] = 100.0*globalStats["nbErrFound"] / globalStats["nbErr"]
    if globalStats["nbBack"] > 0 :
      globalStats["backPrecision"] = 100.0*globalStats["backOnErr"] / globalStats["nbBack"]
    if globalStats["backPrecision"] + globalStats["backRecall"] > 0.0 :
      globalStats["backFScore"] = 2*(globalStats["backPrecision"] * globalStats["backRecall"])/(globalStats["backPrecision"] + globalStats["backRecall"])
    if globalStats["nbRedoneErrErr"] :
      globalStats["redoneErrErrAvgDistChange"] /= globalStats["nbRedoneErrErr"]
      globalStats["redoneErrErrAvgIndexChange"] /= globalStats["nbRedoneErrErr"]
    if globalStats["nbRedone"] :
      globalStats["redoneAvgErrChange"] /= globalStats["nbRedone"]
      globalStats["nbRedoneDiminishErr"] /= globalStats["nbRedone"] * (1/100)
      globalStats["nbRedoneAugmentErr"] /= globalStats["nbRedone"] * (1/100)
      globalStats["nbRedoneCorrectCorrect"] /= globalStats["nbRedone"] * (1/100)
      globalStats["nbRedoneErrErr"] /= globalStats["nbRedone"] * (1/100)
      globalStats["nbRedoneCorrectErr"] /= globalStats["nbRedone"] * (1/100)
      globalStats["nbRedoneErrCorrect"] /= globalStats["nbRedone"] * (1/100)

    return globalStats
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def printHumanReadable(self, out) :
    for sentIndex in range(len(self.sentences)) :
      sentence = self.sentences[sentIndex][1]
      annotations = [self.sentences[sentIndex][0][wid] for wid in sorted(list(self.sentences[sentIndex][0].keys()))]
      maxNbVersions = max([block.nbVersions() for block in sentence])
      print(englobStr("Sentence %d"%sentIndex, "-", (1+maxNbVersions)*(1+lenLine())), file=out)
      totalOutput = []
      for block in sentence :
        totalOutput += block.getAsLines(maxNbVersions)
      for i in range(len(totalOutput)) :
        print(totalOutput[i] + ("\t"+("Output of the machine:" if i == 0 else annotations[i-1]) if i in range(len(annotations)+1) else ""), file=out)
      print("", file=out)
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
  def readFromTrace(self, traceFile) :
    curStep = Step()
    started = False

    for line in open(traceFile, "r") :
      line = line.rstrip()

      # End of sentence :
      if len(line) == 0 :
        if len(self.sentences) == 0 or len(self.sentences[-1]) > 0 :
          self.sentences.append([])
          self.sentences[-1].append({})
          self.sentences[-1].append([])
        continue

      if "-----" in line :
        started = True
      if not started :
        continue

      if "state :" in line :
        curStep.state = int(line.split(':')[-1].strip())
      elif "=>" in line :
        annotLine = line.split("=>")[-1]
        curId = int(annotLine.split()[0])
        curStep.word = annotLine.split()[args.formIndex]
        self.sentences[-1][0][curId] = annotLine
      elif "stack :" in line :
        curStep.stack = ["".join([c for c in a if c.isdigit()]) for a in line.split(':')[-1].strip()[1:-2].split(',')]
        curStep.stack = [int(a) for a in curStep.stack if len(a) > 0]
      elif "historyPop" in line :
        curStep.historyPop = ":".join(line.replace("'","").split(':')[1:]).split(')')
        curStep.historyPop = [a.split('(')[-1] for a in curStep.historyPop if len(a.split(',')) > 1]
        if len(curStep.historyPop) > 0 :
          curStep.historyPop = [(a.split(',')[0].strip(),int(a.split(',')[3].strip().split(':')[-1])) for a in curStep.historyPop]
      elif "history" in line :
        curStep.history = ["".join([c for c in a.strip() if c != "'"]) for a in line.split(':')[-1].strip()[1:-2].split(',')]
      elif "*" in line :
        curStep.scores = line.split()
        for i in range(len(curStep.scores))[::-1] :
          if len(curStep.scores[i].split(':')) == 1 :
            curStep.scores[i-1] = " ".join(curStep.scores[i-1:i+1])
        curStep.scores = [a.replace("*","").split(':') for a in curStep.scores if not len(a.split(':')) == 1]
        curStep.scores = [(float(a[0]), a[1]) for a in curStep.scores]
      elif "  " in line :
        annotLine = " ".join(line.split("  ")[1:])
        if "-" not in annotLine.split()[0] :
          curId = int(annotLine.split()[0])
          self.sentences[-1][0][curId] = annotLine
      elif "Chosen action :" in line :
        curStep.action = line.split(':')[-1].strip()
      elif "Oracle costs :" in line :
        curStep.costs = line.split(':')[-1].strip().split('[')
        curStep.costs = [a[:-1].replace("'","").replace(']','').split(',') for a in curStep.costs if ',' in a]
        curStep.costs = [(int(a[0]), a[1].strip()) for a in curStep.costs]
        curStep.actionCost = 0 if "BACK" in curStep.action else [c[0] for c in curStep.costs if c[1] == curStep.action][0]
        curStep.oracleCost = min([b[0] for b in curStep.costs])
        curStep.oracleAction = [a[1] for a in curStep.costs if a[0] == curStep.oracleCost][0]
        curStep.oracleScore = [a[0] for a in curStep.scores if a[1] == curStep.oracleAction][0]
        curStep.actionScore = [a[0] for a in curStep.scores if a[1] == curStep.action][0]

        self.sentences[-1][-1].append(curStep)
        curStep = Step()
#-------------------------------------------------------------------------------
################################################################################

################################################################################
def prettyNumber(num) :
  base = "%.2f"%num
  splited = base.split('.')
  striped = splited[1].rstrip('.0')
  if len(striped) > 0 :
    striped = "."+striped
  return splited[0] + striped
################################################################################

################################################################################
if __name__ == "__main__" :
  parser = argparse.ArgumentParser()
  parser.add_argument("traces", nargs="+", default=[],
    help="File produced by debug mode (-d) of the decoding.")
  parser.add_argument("--steps", default=False, action="store_true",
    help="Print all decoding steps.")
  parser.add_argument("--stats", default=False, action="store_true",
    help="Print global stats about the decoding.")
  parser.add_argument("--formIndex", default=1,
    help="Index of the form of words in the trace file.")
  parser.add_argument("--nbScores", default=2,
    help="Number of action scores displayed in --steps mode.")
  args = parser.parse_args()

  if not (args.steps or args.stats) :
    print("ERROR: must provide --steps or --stats", file=sys.stderr)
    exit(1)

  histories = []
  stats = []
  for trace in args.traces :
    histories.append(History())
    histories[-1].readFromTrace(trace)
    histories[-1].segmentInBlocks()
    stats.append(histories[-1].computeStats())

  if args.steps :
    for i in range(len(args.traces)) :
      print("History of '%s' :\n"%args.traces[i])
      histories[i].printHumanReadable(sys.stdout)

  if args.stats :
    asList = [["Filename"]+list(stats[0].keys())]
    for i in range(len(args.traces)) :
      asList.append([args.traces[i]]+list(map(prettyNumber, list(stats[i].values()))))
    maxLens = [max(map(len, asList[i])) for i in range(len(asList))]
    for i in range(len(asList[0])) :
      for j in range(len(asList)) :
        sep = "." if j == 0 else " "
        print("%s"%(asList[j][i]+sep*(1+maxLens[j]-len(asList[j][i]))), end=" ")
      print("")
################################################################################