Skip to content
Snippets Groups Projects
Commit 272c4c4d authored by Franck Dary's avatar Franck Dary
Browse files

Added erased status

parent 19ec0711
Branches
No related tags found
No related merge requests found
from readMCD import readMCD from readMCD import readMCD
import Dicts
import sys import sys
################################################################################ ################################################################################
...@@ -103,7 +104,7 @@ class Config : ...@@ -103,7 +104,7 @@ class Config :
value = str(self.getAsFeature(lineIndex, self.index2col[colIndex])) value = str(self.getAsFeature(lineIndex, self.index2col[colIndex]))
if value == "" : if value == "" :
value = "_" value = "_"
elif self.index2col[colIndex] == "HEAD" and value != "-1": elif self.index2col[colIndex] == "HEAD" and value != "-1" and value != Dicts.Dicts.erased :
value = self.getAsFeature(int(value), "ID") value = self.getAsFeature(int(value), "ID")
elif self.index2col[colIndex] == "HEAD" and value == "-1": elif self.index2col[colIndex] == "HEAD" and value == "-1":
value = "0" value = "0"
...@@ -126,9 +127,9 @@ class Config : ...@@ -126,9 +127,9 @@ class Config :
value = str(self.getAsFeature(index, self.index2col[colIndex])) value = str(self.getAsFeature(index, self.index2col[colIndex]))
if value == "" or value == "_" : if value == "" or value == "_" :
value = "_" value = "_"
elif self.index2col[colIndex] == "HEAD" and value != "-1": elif self.index2col[colIndex] == "HEAD" and value != "-1" and value != Dicts.Dicts.erased:
value = self.getAsFeature(int(value), "ID") value = self.getAsFeature(int(value), "ID")
elif self.index2col[colIndex] == "HEAD" and value == "-1": elif self.index2col[colIndex] == "HEAD" and value == "-1" and value != Dicts.Dicts.erased:
value = "0" value = "0"
toPrint.append(value) toPrint.append(value)
print("\t".join(toPrint), file=output) print("\t".join(toPrint), file=output)
......
...@@ -3,16 +3,18 @@ from readMCD import readMCD ...@@ -3,16 +3,18 @@ from readMCD import readMCD
################################################################################ ################################################################################
class Dicts : class Dicts :
unkToken = "__unknown__"
nullToken = "__null__"
noStackToken = "__nostack__"
oobToken = "__oob__"
noDepLeft = "__nodepleft__"
noDepRight = "__nodepright__"
noGov = "__nogov__"
notSeen = "__notseen__"
erased = "__erased__"
def __init__(self) : def __init__(self) :
self.dicts = {} self.dicts = {}
self.unkToken = "__unknown__"
self.nullToken = "__null__"
self.noStackToken = "__nostack__"
self.oobToken = "__oob__"
self.noDepLeft = "__nodepleft__"
self.noDepRight = "__nodepright__"
self.noGov = "__nogov__"
self.notSeen = "__notseen__"
def addDict(self, name, d) : def addDict(self, name, d) :
if name in self.dicts : if name in self.dicts :
...@@ -39,7 +41,7 @@ class Dicts : ...@@ -39,7 +41,7 @@ class Dicts :
targetColumns = list(col2index.keys()) targetColumns = list(col2index.keys())
else : else :
targetColumns = list(colsSet) targetColumns = list(colsSet)
self.dicts = {col : {self.unkToken : (0,minCount), self.nullToken : (1,minCount), self.noStackToken : (2,minCount), self.oobToken : (3,minCount), self.noDepLeft : (4,minCount), self.noDepRight : (5,minCount), self.noGov : (6,minCount), self.notSeen : (7,minCount)} for col in targetColumns} self.dicts = {col : {self.unkToken : (0,minCount), self.nullToken : (1,minCount), self.noStackToken : (2,minCount), self.oobToken : (3,minCount), self.noDepLeft : (4,minCount), self.noDepRight : (5,minCount), self.noGov : (6,minCount), self.notSeen : (7,minCount), self.erased : (8,minCount)} for col in targetColumns}
splited = line.split('\t') splited = line.split('\t')
for col in targetColumns : for col in targetColumns :
......
import torch import torch
import sys import sys
from Util import isEmpty from Util import isEmpty
import Dicts
################################################################################ ################################################################################
# Input : b=buffer s=stack .0=governor .x=rightChild#x+1 .-x=leftChild#-x-1 # Input : b=buffer s=stack .0=governor .x=rightChild#x+1 .-x=leftChild#-x-1
...@@ -12,6 +13,7 @@ from Util import isEmpty ...@@ -12,6 +13,7 @@ from Util import isEmpty
# -4 : No dependent right # -4 : No dependent right
# -5 : No gov # -5 : No gov
# -6 : Not seen # -6 : Not seen
# -7 : Erased
# If incremental is true, only words that have been 'seen' (at wordIndex) can be used # If incremental is true, only words that have been 'seen' (at wordIndex) can be used
# others will be marked as not seen. # others will be marked as not seen.
def extractIndexes(config, featureFunction, incremental) : def extractIndexes(config, featureFunction, incremental) :
...@@ -39,6 +41,8 @@ def extractIndexes(config, featureFunction, incremental) : ...@@ -39,6 +41,8 @@ def extractIndexes(config, featureFunction, incremental) :
head = config.getAsFeature(index, "HEAD") head = config.getAsFeature(index, "HEAD")
if isEmpty(head) : if isEmpty(head) :
index = -5 index = -5
elif head == Dicts.Dicts.erased :
index = -7
else : else :
index = int(head) index = int(head)
continue continue
...@@ -62,7 +66,7 @@ def extractIndexes(config, featureFunction, incremental) : ...@@ -62,7 +66,7 @@ def extractIndexes(config, featureFunction, incremental) :
################################################################################ ################################################################################
# For each element of the feature function and for each column, concatenante the dict index # For each element of the feature function and for each column, concatenante the dict index
def extractColsFeatures(dicts, config, featureFunction, cols, incremental) : def extractColsFeatures(dicts, config, featureFunction, cols, incremental) :
specialValues = {-1 : dicts.oobToken, -2 : dicts.noStackToken, -3 : dicts.noDepLeft, -4 : dicts.noDepRight, -5 : dicts.noGov, -6 : dicts.notSeen} specialValues = {-1 : dicts.oobToken, -2 : dicts.noStackToken, -3 : dicts.noDepLeft, -4 : dicts.noDepRight, -5 : dicts.noGov, -6 : dicts.notSeen, -7 : dicts.erased}
indexes = extractIndexes(config, featureFunction, incremental) indexes = extractIndexes(config, featureFunction, incremental)
totalSize = len(cols)*len(indexes) totalSize = len(cols)*len(indexes)
......
import sys import sys
import Config import Config
import Dicts
from Util import isEmpty from Util import isEmpty
################################################################################ ################################################################################
...@@ -53,31 +54,31 @@ class Transition : ...@@ -53,31 +54,31 @@ class Transition :
def appliable(self, config) : def appliable(self, config) :
if self.name == "RIGHT" : if self.name == "RIGHT" :
for colName in config.predicted : for colName in config.predicted :
if colName not in ["HEAD","DEPREL"] and isEmpty(config.getAsFeature(config.wordIndex, colName)) : if colName not in ["HEAD","DEPREL"] and (isEmpty(config.getAsFeature(config.wordIndex, colName)) or config.getAsFeature(config.wordIndex, colName) == Dicts.Dicts.erased) :
return False return False
if not (len(config.stack) >= self.size and isEmpty(config.getAsFeature(config.wordIndex, "HEAD")) and not linkCauseCycle(config, config.stack[-self.size], config.wordIndex)) : if not (len(config.stack) >= self.size and (isEmpty(config.getAsFeature(config.wordIndex, "HEAD")) or config.getAsFeature(config.wordIndex, "HEAD") == Dicts.Dicts.erased) and not linkCauseCycle(config, config.stack[-self.size], config.wordIndex)) :
return False return False
orphansInStack = [s for s in config.stack[-self.size+1:] if isEmpty(config.getAsFeature(s, "HEAD"))] if self.size > 1 else [] orphansInStack = [s for s in config.stack[-self.size+1:] if isEmpty(config.getAsFeature(s, "HEAD")) or config.getAsFeature(s, "HEAD") == Dicts.Dicts.erased] if self.size > 1 else []
return len(orphansInStack) == 0 return len(orphansInStack) == 0
if self.name == "LEFT" : if self.name == "LEFT" :
for colName in config.predicted : for colName in config.predicted :
if colName not in ["HEAD","DEPREL"] and isEmpty(config.getAsFeature(config.wordIndex, colName)) : if colName not in ["HEAD","DEPREL"] and (isEmpty(config.getAsFeature(config.wordIndex, colName)) or config.getAsFeature(config.wordIndex, colName) == Dicts.Dicts.erased) :
return False return False
if not (len(config.stack) >= self.size and isEmpty(config.getAsFeature(config.stack[-self.size], "HEAD")) and not linkCauseCycle(config, config.wordIndex, config.stack[-self.size])) : if not (len(config.stack) >= self.size and (isEmpty(config.getAsFeature(config.stack[-self.size], "HEAD"))or config.getAsFeature(config.stack[-self.size], "HEAD") == Dicts.Dicts.erased) and not linkCauseCycle(config, config.wordIndex, config.stack[-self.size])) :
return False return False
orphansInStack = [s for s in config.stack[-self.size+1:] if isEmpty(config.getAsFeature(s, "HEAD"))] if self.size > 1 else [] orphansInStack = [s for s in config.stack[-self.size+1:] if (isEmpty(config.getAsFeature(s, "HEAD")) or config.getAsFeature(s, "HEAD") == Dicts.Dicts.erased)] if self.size > 1 else []
return len(orphansInStack) == 0 return len(orphansInStack) == 0
if self.name == "SHIFT" : if self.name == "SHIFT" :
for colName in config.predicted : for colName in config.predicted :
if colName not in ["HEAD","DEPREL"] and isEmpty(config.getAsFeature(config.wordIndex, colName)) : if colName not in ["HEAD","DEPREL"] and (isEmpty(config.getAsFeature(config.wordIndex, colName))or config.getAsFeature(config.wordIndex, colName) == Dicts.Dicts.erased) :
return False return False
return config.wordIndex < len(config.lines) - 1 return config.wordIndex < len(config.lines) - 1
if self.name == "REDUCE" : if self.name == "REDUCE" :
return len(config.stack) > 0 and not isEmpty(config.getAsFeature(config.stack[-1], "HEAD")) return len(config.stack) > 0 and not (isEmpty(config.getAsFeature(config.stack[-1], "HEAD")) or config.getAsFeature(config.stack[-1], "HEAD") == Dicts.Dicts.erased)
if self.name == "EOS" : if self.name == "EOS" :
return config.wordIndex == len(config.lines) - 1 return config.wordIndex == len(config.lines) - 1
if self.name == "TAG" : if self.name == "TAG" :
return isEmpty(config.getAsFeature(config.wordIndex, self.colName)) return isEmpty(config.getAsFeature(config.wordIndex, self.colName)) or config.getAsFeature(config.wordIndex, self.colName) == Dicts.Dicts.erased
if self.name == "NOBACK" : if self.name == "NOBACK" :
return True return True
if "BACK" in self.name : if "BACK" in self.name :
...@@ -149,7 +150,7 @@ def nbLinksBufferStack(config) : ...@@ -149,7 +150,7 @@ def nbLinksBufferStack(config) :
################################################################################ ################################################################################
# Return True if link between from and to would cause a cycle # Return True if link between from and to would cause a cycle
def linkCauseCycle(config, fromIndex, toIndex) : def linkCauseCycle(config, fromIndex, toIndex) :
while not isEmpty(config.getAsFeature(fromIndex, "HEAD")) : while not isEmpty(config.getAsFeature(fromIndex, "HEAD")) and not config.getAsFeature(fromIndex, "HEAD") == Dicts.Dicts.erased :
fromIndex = int(config.getAsFeature(fromIndex, "HEAD")) fromIndex = int(config.getAsFeature(fromIndex, "HEAD"))
if fromIndex == toIndex : if fromIndex == toIndex :
return True return True
...@@ -208,7 +209,7 @@ def applyBackRight(config, data, size) : ...@@ -208,7 +209,7 @@ def applyBackRight(config, data, size) :
config.stack.pop() config.stack.pop()
while len(data) > 0 : while len(data) > 0 :
config.stack.append(data.pop()) config.stack.append(data.pop())
config.set(config.wordIndex, "HEAD", "") config.set(config.wordIndex, "HEAD", Dicts.Dicts.erased)
config.predChilds[config.stack[-size]].pop() config.predChilds[config.stack[-size]].pop()
################################################################################ ################################################################################
...@@ -217,7 +218,7 @@ def applyBackLeft(config, data, size) : ...@@ -217,7 +218,7 @@ def applyBackLeft(config, data, size) :
config.stack.append(data.pop()) config.stack.append(data.pop())
while len(data) > 0 : while len(data) > 0 :
config.stack.append(data.pop()) config.stack.append(data.pop())
config.set(config.stack[-size], "HEAD", "") config.set(config.stack[-size], "HEAD", Dicts.Dicts.erased)
config.predChilds[config.wordIndex].pop() config.predChilds[config.wordIndex].pop()
################################################################################ ################################################################################
...@@ -233,7 +234,7 @@ def applyBackReduce(config, data) : ...@@ -233,7 +234,7 @@ def applyBackReduce(config, data) :
################################################################################ ################################################################################
def applyBackTag(config, colName) : def applyBackTag(config, colName) :
config.set(config.wordIndex, colName, "") config.set(config.wordIndex, colName, Dicts.Dicts.erased)
################################################################################ ################################################################################
################################################################################ ################################################################################
...@@ -273,9 +274,9 @@ def applyEOS(config) : ...@@ -273,9 +274,9 @@ def applyEOS(config) :
if not config.hasCol("HEAD") or not config.isPredicted("HEAD") : if not config.hasCol("HEAD") or not config.isPredicted("HEAD") :
return return
rootCandidates = [index for index in config.stack if not config.isMultiword(index) and isEmpty(config.getAsFeature(index, "HEAD"))] rootCandidates = [index for index in config.stack if not config.isMultiword(index) and (isEmpty(config.getAsFeature(index, "HEAD")) or config.getAsFeature(index, "HEAD") == Dicts.Dicts.erased)]
if len(rootCandidates) == 0 : if len(rootCandidates) == 0 :
rootCandidates = [index for index in range(len(config.lines)) if not config.isMultiword(index) and isEmpty(config.getAsFeature(index, "HEAD"))] rootCandidates = [index for index in range(len(config.lines)) if not config.isMultiword(index) and (isEmpty(config.getAsFeature(index, "HEAD")) or config.getAsFeature(index, "HEAD") == Dicts.Dicts.erased)]
if len(rootCandidates) == 0 : if len(rootCandidates) == 0 :
print("ERROR : no candidates for root", file=sys.stderr) print("ERROR : no candidates for root", file=sys.stderr)
...@@ -287,7 +288,7 @@ def applyEOS(config) : ...@@ -287,7 +288,7 @@ def applyEOS(config) :
config.set(rootIndex, "DEPREL", "root") config.set(rootIndex, "DEPREL", "root")
for index in range(len(config.lines)) : for index in range(len(config.lines)) :
if config.isMultiword(index) or not isEmpty(config.getAsFeature(index, "HEAD")) : if config.isMultiword(index) or not (isEmpty(config.getAsFeature(index, "HEAD")) or config.getAsFeature(index, "HEAD") == Dicts.Dicts.erased) :
continue continue
config.set(index, "HEAD", str(rootIndex)) config.set(index, "HEAD", str(rootIndex))
config.predChilds[rootIndex].append(index) config.predChilds[rootIndex].append(index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment