From 3796e0eca83476840403221dbcdc064c7c115df9 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Tue, 5 Oct 2021 16:15:43 +0200 Subject: [PATCH] Started to improve readTrace --- readTrace.py | 536 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 372 insertions(+), 164 deletions(-) diff --git a/readTrace.py b/readTrace.py index 9de685b..bbf5892 100755 --- a/readTrace.py +++ b/readTrace.py @@ -1,169 +1,377 @@ #! /usr/bin/env python3 import sys +import argparse -if len(sys.argv) != 2 : - print("USAGE : %s trace.txt"%sys.argv[0], file=sys.stderr) - exit(1) - -backSize = 1 # Script only works with one back action at the moment - -nbActionsPerState = {} # for each state, a dict of actionName -> nbOccurences -nbBacks = {} # dict of backName -> nbOccurences -nbBacksUndoError = {} # dict of backName -> nb times it has undone at least 1 err -nbNoBacks = {} # dict of NOBACK -> nbOccurences -nbNoBacksUndoError = {} # dict of NOBACK -> nb times BACK would have undone at least an error if BACK was always chosen -nbCorrectCorrect = 0 -nbErrCorrect = 0 -nbErrErr = 0 -nbCorrectErr = 0 - -curState = None -curStack = None -curHistory = None -curHistoryPop = None -curScores = None -curAction = None -curCosts = None -curCost = None -curNbUndone = None -curUndone = None - -for line in open(sys.argv[1], "r") : - line = line.strip() - - # End of sentence : - if len(line) == 0 and curHistoryPop is not None : - curState = None - curStack = None - curHistory = None - curHistoryPop = None - curScores = None - curAction = None - curCosts = None - curCost = None - curNbUndone = None - curUndone = None - - # Collect info on current line : - if "state :" in line : - curState = int(line.split(':')[-1].strip()) - elif "stack :" in line : - curStack = ["".join([c for c in a if c.isdigit()]) for a in line.split(':')[-1].strip()[1:-2].split(',')] - curStack = [int(a) for a in curStack if len(a) > 0] - elif "historyPop" in line : - curHistoryPop = ":".join(line.replace("'","").split(':')[1:]).split(')') - curHistoryPop = [a.split('(')[-1] for a in curHistoryPop if len(a.split(',')) > 1] - if len(curHistoryPop) > 0 : - curHistoryPop = [(a.split(',')[0].strip(),int(a.split(',')[3].strip().split(':')[-1])) for a in curHistoryPop] - elif "nbUndone :" in line : - curNbUndone = int(line.split(':')[1].strip()) - elif "history" in line : - curHistory = ["".join([c for c in a.strip() if c != "'"]) for a in line.split(':')[-1].strip()[1:-2].split(',')] - elif "*" in line : - curScores = line.split() - for i in range(len(curScores))[::-1] : - if len(curScores[i].split(':')) == 1 : - curScores[i-1] = " ".join(curScores[i-1:i+1]) - curScores = [a.replace("*","").split(':') for a in curScores if not len(a.split(':')) == 1] - curScores = [(float(a[0]), a[1]) for a in curScores] - elif "Chosen action :" in line : - curAction = line.split(':')[-1].strip() - elif "Oracle costs :" in line : - curCosts = line.split(':')[-1].strip().split('[') - curCosts = [a[:-1].replace("'","").replace(']','').split(',') for a in curCosts if ',' in a] - curCosts = [(int(a[0]), a[1].strip()) for a in curCosts] - curCost = None if "BACK" in curAction else [c[0] for c in curCosts if c[1] == curAction][0] - # End of action choice : - # Count actions - if curState not in nbActionsPerState : - nbActionsPerState[curState] = {} - if curAction not in nbActionsPerState[curState] : - nbActionsPerState[curState][curAction] = 0 - nbActionsPerState[curState][curAction] += 1 - - if curUndone is not None and len(curUndone) > 0 and curNbUndone > 0 and "NOBACK" not in curAction and "BACK" not in curAction : - prevCost = curUndone[0] - curUndone = curUndone[1:] - if prevCost == 0 and curCost == 0 : - nbCorrectCorrect += 1 - elif prevCost == 0 and curCost != 0 : - nbCorrectErr += 1 - elif prevCost != 0 and curCost != 0 : - nbErrErr += 1 - elif prevCost != 0 and curCost == 0 : - nbErrCorrect += 1 - - if "NOBACK" in curAction and len([a for a in curHistoryPop if a[0] == "NOBACK"]) >= backSize and "BACK" not in curHistory[-1] : - if curAction not in nbNoBacks : - nbNoBacks[curAction] = 0 - nbNoBacks[curAction] += 1 - size = backSize - nbErrors = 0 - for a in curHistoryPop[::-1] : - if a[0] == "NOBACK" : - size -= 1 - if size == 0 : - break - continue - if a[1] < 0 : - nbErrors += 1 - if curAction not in nbNoBacksUndoError : - nbNoBacksUndoError[curAction] = 0 - if nbErrors > 0 : - nbNoBacksUndoError[curAction] += 1 - elif "BACK" in curAction and "NOBACK" not in curAction : - if curAction not in nbBacks : - nbBacks[curAction] = 0 - nbBacks[curAction] += 1 - size = int(curAction.split()[-1].strip()) - if size != backSize : - raise Exception("backSize is wrong") - nbErrors = 0 - for a in curHistoryPop[::-1] : - if a[0] == "NOBACK" : - size -= 1 - if size == 0 : - break +################################################################################ +def lenLine() : + return 35 +################################################################################ + +################################################################################ +def englobStr(s, symbol, totalLen) : + s = " %s "%s + df = totalLen - len(s) + return "%s%s%s"%(symbol*(df//2),s,symbol*(df-2*df//2+df//2)) +################################################################################ + +################################################################################ +class Step() : +#------------------------------------------------------------------------------- + def __init__(self) : + self.state = None + self.action = None + self.scores = None + self.costs = None + self.oracleAction = None + self.actionScore = None + self.actionCost = None + self.oracleScore = None + self.stack = None + self.historyPop = None + self.history = None +#------------------------------------------------------------------------------- + +#------------------------------------------------------------------------------- + def __str__(self) : + def simple(action) : + if "TAG" in action : + return action.split()[-1] + elif "RIGHT" in action or "LEFT" in action : + return action.split()[0] + return action + action = "'%s'"%simple(self.action) + if self.actionCost > self.oracleCost : + action += "->" + "'%s'"%simple(self.oracleAction) +\ + "(dist=%.2f index=%d)"%(self.distance, self.oracleIndex) + return action +#------------------------------------------------------------------------------- +################################################################################ + +################################################################################ +class History() : +#------------------------------------------------------------------------------- + def __init__(self) : + self.sentences = [] +#------------------------------------------------------------------------------- + +#------------------------------------------------------------------------------- + def segmentInBlocks(self) : + # structure : sentence = list of block = list of versions = list of steps + sentences = [] + for sentence in self.sentences : + lastState = None + sentences.append([]) + blockIndex = 0 + for step in sentence : + if lastState is not None and lastState != step.state : + blockIndex += 1 + while blockIndex >= len(sentences[-1]) : + sentences[-1].append([[]]) + block = sentences[-1][blockIndex][-1] + block.append(step) + lastState = step.state + if "BACK" in step.action and "NOBACK" not in step.action : + backSize = int(step.action.split()[-1]) + backState = step.state + while backSize > 0 : + blockIndex -= 1 + state = sentences[-1][blockIndex][-1][0].state + if state == backState : + backSize -= 1 + for block in sentences[-1][blockIndex:] : + block.append([]) + + self.sentences = sentences +#------------------------------------------------------------------------------- + +#------------------------------------------------------------------------------- + def computeValues(self) : + for sentence in self.sentences : + for step in sentence : + step.distance = abs(step.actionScore-step.oracleScore) + step.oracleIndex = [a[1] for a in step.scores].index(step.oracleAction) +#------------------------------------------------------------------------------- + +#------------------------------------------------------------------------------- + def printStats(self, out) : + if type(self.sentences[0][0]) != list : # Before segmentInBlocks + print("%d sentences, %d actions"%(len(self.sentences), + sum([len(s) for s in self.sentences])), file=out) + + for sentence in self.sentences : + print("", file=out) + print(englobStr("Sentence", "-", 2*lenLine()), file=out) + for step in sentence : + print(step, file=out) + return + + for sentIndex in range(len(self.sentences)) : + sentence = self.sentences[sentIndex] + print("", file=out) + print(englobStr("Sentence %d"%sentIndex, "-", 2*lenLine()), file=out) + for block in sentence : + if type(block[0]) != list : # One version of the block, without backtrack + print(englobStr("State %d"%block[0].state, "-", lenLine()), file=out) + for step in block : + print(step, file=out) continue - if curUndone is None : - curUndone = [] - curUndone = [a[1]] + curUndone - if a[1] < 0 : - nbErrors += 1 - if curAction not in nbBacksUndoError : - nbBacksUndoError[curAction] = 0 - if nbErrors > 0 : - nbBacksUndoError[curAction] += 1 - - -# Printing for each states, number of occurrences of each actions -print("Occurrences of actions :") -for state in nbActionsPerState : - print("State", state, ":") - print(" %d\ttotal"%(sum(list(nbActionsPerState[state].values())))) - actions = sorted([[nbActionsPerState[state][action],action] for action in nbActionsPerState[state]])[::-1] - actions = [" %d\t%s"%(a[0],a[1]) for a in actions] - print("\n".join(actions)) - -# Answering the question of whether or not the backs are triggered to undo errors -# We compare the number of times a back has undone at least 1 bad action -# with the number of times it would have been the case if we always did back. -print("\nAbout triggering of back actions :") -for action in nbBacks : - total = nbBacks[action] - undoErr = nbBacksUndoError[action] - perc = "%5.2f%%"%(100.0*undoErr/total) - print(action) - print(" %s (%d/%d)\tof them canceled a bad action"%(perc, undoErr, total)) - total += nbNoBacks["NOBACK"] - undoErr += nbNoBacksUndoError["NOBACK"] - perc = "%5.2f%%"%(100.0*undoErr/total) - print(" %s (%d/%d)\tif it was always chosen"%(perc, undoErr, total)) - -print("\nAbout error correction after a BACK :") -totalRedo = nbErrErr + nbErrCorrect + nbCorrectErr + nbCorrectCorrect -print(" %5.2f%% (%d/%d)\ttransformed Error into Error"%(100.0*nbErrErr/totalRedo, nbErrErr, totalRedo)) -print(" %5.2f%% (%d/%d)\ttransformed Correct into Correct"%(100.0*nbCorrectCorrect/totalRedo, nbCorrectCorrect, totalRedo)) -print(" %5.2f%% (%d/%d)\ttransformed Correct into Error"%(100.0*nbCorrectErr/totalRedo, nbCorrectErr, totalRedo)) -print(" %5.2f%% (%d/%d)\ttransformed Error into Correct"%(100.0*nbErrCorrect/totalRedo, nbErrCorrect, totalRedo)) + versions = [] + for version in block : + versions.append([]) + lineStr = englobStr("State %d"%block[0][0].state, "-", lenLine()) + versions[-1].append(lineStr + (lenLine()-len(lineStr))*" ") + for step in version : + versions[-1].append(str(step) + (lenLine()-len(str(step)))*" ") + maxIndex = max([len(version) for version in versions]) + for i in range(maxIndex) : + print("\t".join([version[i] for version in versions if i < len(version)])) +#------------------------------------------------------------------------------- + +#------------------------------------------------------------------------------- + def readFromTrace(self, traceFile) : + self.sentences.append([]) + curStep = Step() + + for line in open(traceFile, "r") : + line = line.strip() + + # End of sentence : + if len(line) == 0 : + if len(self.sentences[-1]) > 0 : + self.sentences.append([]) + continue + + if "state :" in line : + curStep.state = int(line.split(':')[-1].strip()) + elif "stack :" in line : + curStep.stack = ["".join([c for c in a if c.isdigit()]) for a in line.split(':')[-1].strip()[1:-2].split(',')] + curStep.stack = [int(a) for a in curStep.stack if len(a) > 0] + elif "historyPop" in line : + curStep.historyPop = ":".join(line.replace("'","").split(':')[1:]).split(')') + curStep.historyPop = [a.split('(')[-1] for a in curStep.historyPop if len(a.split(',')) > 1] + if len(curStep.historyPop) > 0 : + curStep.historyPop = [(a.split(',')[0].strip(),int(a.split(',')[3].strip().split(':')[-1])) for a in curStep.historyPop] + elif "history" in line : + curStep.history = ["".join([c for c in a.strip() if c != "'"]) for a in line.split(':')[-1].strip()[1:-2].split(',')] + elif "*" in line : + curStep.scores = line.split() + for i in range(len(curStep.scores))[::-1] : + if len(curStep.scores[i].split(':')) == 1 : + curStep.scores[i-1] = " ".join(curStep.scores[i-1:i+1]) + curStep.scores = [a.replace("*","").split(':') for a in curStep.scores if not len(a.split(':')) == 1] + curStep.scores = [(float(a[0]), a[1]) for a in curStep.scores] + + elif "Chosen action :" in line : + curStep.action = line.split(':')[-1].strip() + elif "Oracle costs :" in line : + curStep.costs = line.split(':')[-1].strip().split('[') + curStep.costs = [a[:-1].replace("'","").replace(']','').split(',') for a in curStep.costs if ',' in a] + curStep.costs = [(int(a[0]), a[1].strip()) for a in curStep.costs] + curStep.actionCost = 0 if "BACK" in curStep.action else [c[0] for c in curStep.costs if c[1] == curStep.action][0] + curStep.oracleCost = min([b[0] for b in curStep.costs]) + curStep.oracleAction = [a[1] for a in curStep.costs if a[0] == curStep.oracleCost][0] + curStep.oracleScore = [a[0] for a in curStep.scores if a[1] == curStep.oracleAction][0] + curStep.actionScore = [a[0] for a in curStep.scores if a[1] == curStep.action][0] + + self.sentences[-1].append(curStep) + curStep = Step() +#------------------------------------------------------------------------------- +################################################################################ + +################################################################################ +if __name__ == "__main__" : + parser = argparse.ArgumentParser() + parser.add_argument("trace", type=str, + help="File produced by debug mode (-d) of the decoding.") + args = parser.parse_args() + + history = History() + history.readFromTrace(args.trace) + + history.computeValues() + + history.segmentInBlocks() + + history.printStats(sys.stdout) +################################################################################ + +def trash() : + backSize = 1 # Script only works with one back action at the moment + + nbActionsPerState = {} # for each state, a dict of actionName -> nbOccurences + nbBacks = {} # dict of backName -> nbOccurences + nbBacksUndoError = {} # dict of backName -> nb times it has undone at least 1 err + nbNoBacks = {} # dict of NOBACK -> nbOccurences + nbNoBacksUndoError = {} # dict of NOBACK -> nb times BACK would have undone at least an error if BACK was always chosen + nbCorrectCorrect = 0 + nbErrCorrect = 0 + nbErrErr = 0 + nbCorrectErr = 0 + + curState = None + curStack = None + curHistory = None + curHistoryPop = None + curScores = None + curAction = None + curCosts = None + curCost = None + curNbUndone = None + curUndone = None + actionsStats = [] + + for line in open(sys.argv[1], "r") : + line = line.strip() + print(line) + + # End of sentence : + if len(line) == 0 and curHistoryPop is not None : + curState = None + curStack = None + curHistory = None + curHistoryPop = None + curScores = None + curAction = None + curCosts = None + curCost = None + curNbUndone = None + curUndone = None + actionsStats = [] + + # Collect info on current line : + if "state :" in line : + curState = int(line.split(':')[-1].strip()) + elif "stack :" in line : + curStack = ["".join([c for c in a if c.isdigit()]) for a in line.split(':')[-1].strip()[1:-2].split(',')] + curStack = [int(a) for a in curStack if len(a) > 0] + elif "historyPop" in line : + curHistoryPop = ":".join(line.replace("'","").split(':')[1:]).split(')') + curHistoryPop = [a.split('(')[-1] for a in curHistoryPop if len(a.split(',')) > 1] + if len(curHistoryPop) > 0 : + curHistoryPop = [(a.split(',')[0].strip(),int(a.split(',')[3].strip().split(':')[-1])) for a in curHistoryPop] + elif "nbUndone :" in line : + curNbUndone = int(line.split(':')[1].strip()) + elif "history" in line : + curHistory = ["".join([c for c in a.strip() if c != "'"]) for a in line.split(':')[-1].strip()[1:-2].split(',')] + elif "*" in line : + curScores = line.split() + for i in range(len(curScores))[::-1] : + if len(curScores[i].split(':')) == 1 : + curScores[i-1] = " ".join(curScores[i-1:i+1]) + curScores = [a.replace("*","").split(':') for a in curScores if not len(a.split(':')) == 1] + curScores = [(float(a[0]), a[1]) for a in curScores] + + elif "Chosen action :" in line : + curAction = line.split(':')[-1].strip() + elif "Oracle costs :" in line : + curCosts = line.split(':')[-1].strip().split('[') + curCosts = [a[:-1].replace("'","").replace(']','').split(',') for a in curCosts if ',' in a] + curCosts = [(int(a[0]), a[1].strip()) for a in curCosts] + curCost = None if "BACK" in curAction else [c[0] for c in curCosts if c[1] == curAction][0] + correctAction = [a[1] for a in curCosts if a[0] == min([b[0] for b in curCosts])][0] + correctActionScore = [a[0] for a in curScores if a[1] == correctAction][0] + chosenActionScore = [a[0] for a in curScores if a[1] == curAction][0] + distanceOfCorrectAction = abs(correctActionScore-chosenActionScore) + indexOfCorrect = [a[1] for a in curScores].index(correctAction) + if "BACK" not in curAction : + if curUndone is None or len(curUndone) == 0 : + actionsStats.append((curAction,correctAction,distanceOfCorrectAction,indexOfCorrect)) + else : + actionsStats[-len(curUndone)] = (curAction,correctAction,distanceOfCorrectAction,indexOfCorrect) + # End of action choice : + # Count actions + if curState not in nbActionsPerState : + nbActionsPerState[curState] = {} + if curAction not in nbActionsPerState[curState] : + nbActionsPerState[curState][curAction] = 0 + nbActionsPerState[curState][curAction] += 1 + + print("curUndone = %s"%str(curUndone)) + print("actionsStats = %s"%str(actionsStats)) + + if curUndone is not None and len(curUndone) > 0 and curNbUndone > 0 and "NOBACK" not in curAction and "BACK" not in curAction : + prevCost = curUndone[0] + curUndone = curUndone[1:] + if prevCost == 0 and curCost == 0 : + nbCorrectCorrect += 1 + elif prevCost == 0 and curCost != 0 : + nbCorrectErr += 1 + elif prevCost != 0 and curCost != 0 : + nbErrErr += 1 + elif prevCost != 0 and curCost == 0 : + nbErrCorrect += 1 + + if "NOBACK" in curAction and len([a for a in curHistoryPop if a[0] == "NOBACK"]) >= backSize and "BACK" not in curHistory[-1] : + if curAction not in nbNoBacks : + nbNoBacks[curAction] = 0 + nbNoBacks[curAction] += 1 + size = backSize + nbErrors = 0 + for a in curHistoryPop[::-1] : + if a[0] == "NOBACK" : + size -= 1 + if size == 0 : + break + continue + if a[1] < 0 : + nbErrors += 1 + if curAction not in nbNoBacksUndoError : + nbNoBacksUndoError[curAction] = 0 + if nbErrors > 0 : + nbNoBacksUndoError[curAction] += 1 + elif "BACK" in curAction and "NOBACK" not in curAction : + if curAction not in nbBacks : + nbBacks[curAction] = 0 + nbBacks[curAction] += 1 + size = int(curAction.split()[-1].strip()) + if size != backSize : + raise Exception("backSize is wrong") + nbErrors = 0 + for a in curHistoryPop[::-1] : + if a[0] == "NOBACK" : + size -= 1 + if size == 0 : + break + continue + if curUndone is None : + curUndone = [] + curUndone = [a[1]] + curUndone + if a[1] < 0 : + nbErrors += 1 + if curAction not in nbBacksUndoError : + nbBacksUndoError[curAction] = 0 + if nbErrors > 0 : + nbBacksUndoError[curAction] += 1 + + + # Printing for each states, number of occurrences of each actions + print("Occurrences of actions :") + for state in nbActionsPerState : + print("State", state, ":") + print(" %d\ttotal"%(sum(list(nbActionsPerState[state].values())))) + actions = sorted([[nbActionsPerState[state][action],action] for action in nbActionsPerState[state]])[::-1] + actions = [" %d\t%s"%(a[0],a[1]) for a in actions] + print("\n".join(actions)) + + # Answering the question of whether or not the backs are triggered to undo errors + # We compare the number of times a back has undone at least 1 bad action + # with the number of times it would have been the case if we always did back. + print("\nAbout triggering of back actions :") + for action in nbBacks : + total = nbBacks[action] + undoErr = nbBacksUndoError[action] + perc = "%5.2f%%"%(100.0*undoErr/total) + print(action) + print(" %s (%d/%d)\tof them canceled a bad action"%(perc, undoErr, total)) + total += nbNoBacks["NOBACK"] + undoErr += nbNoBacksUndoError["NOBACK"] + perc = "%5.2f%%"%(100.0*undoErr/total) + print(" %s (%d/%d)\tif it was always chosen"%(perc, undoErr, total)) + + print("\nAbout error correction after a BACK :") + totalRedo = nbErrErr + nbErrCorrect + nbCorrectErr + nbCorrectCorrect + print(" %5.2f%% (%d/%d)\ttransformed Error into Error"%(100.0*nbErrErr/totalRedo, nbErrErr, totalRedo)) + print(" %5.2f%% (%d/%d)\ttransformed Correct into Correct"%(100.0*nbCorrectCorrect/totalRedo, nbCorrectCorrect, totalRedo)) + print(" %5.2f%% (%d/%d)\ttransformed Correct into Error"%(100.0*nbCorrectErr/totalRedo, nbCorrectErr, totalRedo)) + print(" %5.2f%% (%d/%d)\ttransformed Error into Correct"%(100.0*nbErrCorrect/totalRedo, nbErrCorrect, totalRedo)) -- GitLab