Skip to content
Snippets Groups Projects
Commit 272c4c4d authored by Franck Dary's avatar Franck Dary
Browse files

Added erased status

parent 19ec0711
No related branches found
No related tags found
No related merge requests found
from readMCD import readMCD
import Dicts
import sys
################################################################################
......@@ -103,7 +104,7 @@ class Config :
value = str(self.getAsFeature(lineIndex, self.index2col[colIndex]))
if value == "" :
value = "_"
elif self.index2col[colIndex] == "HEAD" and value != "-1":
elif self.index2col[colIndex] == "HEAD" and value != "-1" and value != Dicts.Dicts.erased :
value = self.getAsFeature(int(value), "ID")
elif self.index2col[colIndex] == "HEAD" and value == "-1":
value = "0"
......@@ -126,9 +127,9 @@ class Config :
value = str(self.getAsFeature(index, self.index2col[colIndex]))
if value == "" or value == "_" :
value = "_"
elif self.index2col[colIndex] == "HEAD" and value != "-1":
elif self.index2col[colIndex] == "HEAD" and value != "-1" and value != Dicts.Dicts.erased:
value = self.getAsFeature(int(value), "ID")
elif self.index2col[colIndex] == "HEAD" and value == "-1":
elif self.index2col[colIndex] == "HEAD" and value == "-1" and value != Dicts.Dicts.erased:
value = "0"
toPrint.append(value)
print("\t".join(toPrint), file=output)
......
......@@ -3,16 +3,18 @@ from readMCD import readMCD
################################################################################
class Dicts :
unkToken = "__unknown__"
nullToken = "__null__"
noStackToken = "__nostack__"
oobToken = "__oob__"
noDepLeft = "__nodepleft__"
noDepRight = "__nodepright__"
noGov = "__nogov__"
notSeen = "__notseen__"
erased = "__erased__"
def __init__(self) :
self.dicts = {}
self.unkToken = "__unknown__"
self.nullToken = "__null__"
self.noStackToken = "__nostack__"
self.oobToken = "__oob__"
self.noDepLeft = "__nodepleft__"
self.noDepRight = "__nodepright__"
self.noGov = "__nogov__"
self.notSeen = "__notseen__"
def addDict(self, name, d) :
if name in self.dicts :
......@@ -39,7 +41,7 @@ class Dicts :
targetColumns = list(col2index.keys())
else :
targetColumns = list(colsSet)
self.dicts = {col : {self.unkToken : (0,minCount), self.nullToken : (1,minCount), self.noStackToken : (2,minCount), self.oobToken : (3,minCount), self.noDepLeft : (4,minCount), self.noDepRight : (5,minCount), self.noGov : (6,minCount), self.notSeen : (7,minCount)} for col in targetColumns}
self.dicts = {col : {self.unkToken : (0,minCount), self.nullToken : (1,minCount), self.noStackToken : (2,minCount), self.oobToken : (3,minCount), self.noDepLeft : (4,minCount), self.noDepRight : (5,minCount), self.noGov : (6,minCount), self.notSeen : (7,minCount), self.erased : (8,minCount)} for col in targetColumns}
splited = line.split('\t')
for col in targetColumns :
......
import torch
import sys
from Util import isEmpty
import Dicts
################################################################################
# Input : b=buffer s=stack .0=governor .x=rightChild#x+1 .-x=leftChild#-x-1
......@@ -12,6 +13,7 @@ from Util import isEmpty
# -4 : No dependent right
# -5 : No gov
# -6 : Not seen
# -7 : Erased
# If incremental is true, only words that have been 'seen' (at wordIndex) can be used
# others will be marked as not seen.
def extractIndexes(config, featureFunction, incremental) :
......@@ -39,6 +41,8 @@ def extractIndexes(config, featureFunction, incremental) :
head = config.getAsFeature(index, "HEAD")
if isEmpty(head) :
index = -5
elif head == Dicts.Dicts.erased :
index = -7
else :
index = int(head)
continue
......@@ -62,7 +66,7 @@ def extractIndexes(config, featureFunction, incremental) :
################################################################################
# For each element of the feature function and for each column, concatenante the dict index
def extractColsFeatures(dicts, config, featureFunction, cols, incremental) :
specialValues = {-1 : dicts.oobToken, -2 : dicts.noStackToken, -3 : dicts.noDepLeft, -4 : dicts.noDepRight, -5 : dicts.noGov, -6 : dicts.notSeen}
specialValues = {-1 : dicts.oobToken, -2 : dicts.noStackToken, -3 : dicts.noDepLeft, -4 : dicts.noDepRight, -5 : dicts.noGov, -6 : dicts.notSeen, -7 : dicts.erased}
indexes = extractIndexes(config, featureFunction, incremental)
totalSize = len(cols)*len(indexes)
......
import sys
import Config
import Dicts
from Util import isEmpty
################################################################################
......@@ -53,31 +54,31 @@ class Transition :
def appliable(self, config) :
if self.name == "RIGHT" :
for colName in config.predicted :
if colName not in ["HEAD","DEPREL"] and isEmpty(config.getAsFeature(config.wordIndex, colName)) :
if colName not in ["HEAD","DEPREL"] and (isEmpty(config.getAsFeature(config.wordIndex, colName)) or config.getAsFeature(config.wordIndex, colName) == Dicts.Dicts.erased) :
return False
if not (len(config.stack) >= self.size and isEmpty(config.getAsFeature(config.wordIndex, "HEAD")) and not linkCauseCycle(config, config.stack[-self.size], config.wordIndex)) :
if not (len(config.stack) >= self.size and (isEmpty(config.getAsFeature(config.wordIndex, "HEAD")) or config.getAsFeature(config.wordIndex, "HEAD") == Dicts.Dicts.erased) and not linkCauseCycle(config, config.stack[-self.size], config.wordIndex)) :
return False
orphansInStack = [s for s in config.stack[-self.size+1:] if isEmpty(config.getAsFeature(s, "HEAD"))] if self.size > 1 else []
orphansInStack = [s for s in config.stack[-self.size+1:] if isEmpty(config.getAsFeature(s, "HEAD")) or config.getAsFeature(s, "HEAD") == Dicts.Dicts.erased] if self.size > 1 else []
return len(orphansInStack) == 0
if self.name == "LEFT" :
for colName in config.predicted :
if colName not in ["HEAD","DEPREL"] and isEmpty(config.getAsFeature(config.wordIndex, colName)) :
if colName not in ["HEAD","DEPREL"] and (isEmpty(config.getAsFeature(config.wordIndex, colName)) or config.getAsFeature(config.wordIndex, colName) == Dicts.Dicts.erased) :
return False
if not (len(config.stack) >= self.size and isEmpty(config.getAsFeature(config.stack[-self.size], "HEAD")) and not linkCauseCycle(config, config.wordIndex, config.stack[-self.size])) :
if not (len(config.stack) >= self.size and (isEmpty(config.getAsFeature(config.stack[-self.size], "HEAD"))or config.getAsFeature(config.stack[-self.size], "HEAD") == Dicts.Dicts.erased) and not linkCauseCycle(config, config.wordIndex, config.stack[-self.size])) :
return False
orphansInStack = [s for s in config.stack[-self.size+1:] if isEmpty(config.getAsFeature(s, "HEAD"))] if self.size > 1 else []
orphansInStack = [s for s in config.stack[-self.size+1:] if (isEmpty(config.getAsFeature(s, "HEAD")) or config.getAsFeature(s, "HEAD") == Dicts.Dicts.erased)] if self.size > 1 else []
return len(orphansInStack) == 0
if self.name == "SHIFT" :
for colName in config.predicted :
if colName not in ["HEAD","DEPREL"] and isEmpty(config.getAsFeature(config.wordIndex, colName)) :
if colName not in ["HEAD","DEPREL"] and (isEmpty(config.getAsFeature(config.wordIndex, colName))or config.getAsFeature(config.wordIndex, colName) == Dicts.Dicts.erased) :
return False
return config.wordIndex < len(config.lines) - 1
if self.name == "REDUCE" :
return len(config.stack) > 0 and not isEmpty(config.getAsFeature(config.stack[-1], "HEAD"))
return len(config.stack) > 0 and not (isEmpty(config.getAsFeature(config.stack[-1], "HEAD")) or config.getAsFeature(config.stack[-1], "HEAD") == Dicts.Dicts.erased)
if self.name == "EOS" :
return config.wordIndex == len(config.lines) - 1
if self.name == "TAG" :
return isEmpty(config.getAsFeature(config.wordIndex, self.colName))
return isEmpty(config.getAsFeature(config.wordIndex, self.colName)) or config.getAsFeature(config.wordIndex, self.colName) == Dicts.Dicts.erased
if self.name == "NOBACK" :
return True
if "BACK" in self.name :
......@@ -149,7 +150,7 @@ def nbLinksBufferStack(config) :
################################################################################
# Return True if link between from and to would cause a cycle
def linkCauseCycle(config, fromIndex, toIndex) :
while not isEmpty(config.getAsFeature(fromIndex, "HEAD")) :
while not isEmpty(config.getAsFeature(fromIndex, "HEAD")) and not config.getAsFeature(fromIndex, "HEAD") == Dicts.Dicts.erased :
fromIndex = int(config.getAsFeature(fromIndex, "HEAD"))
if fromIndex == toIndex :
return True
......@@ -208,7 +209,7 @@ def applyBackRight(config, data, size) :
config.stack.pop()
while len(data) > 0 :
config.stack.append(data.pop())
config.set(config.wordIndex, "HEAD", "")
config.set(config.wordIndex, "HEAD", Dicts.Dicts.erased)
config.predChilds[config.stack[-size]].pop()
################################################################################
......@@ -217,7 +218,7 @@ def applyBackLeft(config, data, size) :
config.stack.append(data.pop())
while len(data) > 0 :
config.stack.append(data.pop())
config.set(config.stack[-size], "HEAD", "")
config.set(config.stack[-size], "HEAD", Dicts.Dicts.erased)
config.predChilds[config.wordIndex].pop()
################################################################################
......@@ -233,7 +234,7 @@ def applyBackReduce(config, data) :
################################################################################
def applyBackTag(config, colName) :
config.set(config.wordIndex, colName, "")
config.set(config.wordIndex, colName, Dicts.Dicts.erased)
################################################################################
################################################################################
......@@ -273,9 +274,9 @@ def applyEOS(config) :
if not config.hasCol("HEAD") or not config.isPredicted("HEAD") :
return
rootCandidates = [index for index in config.stack if not config.isMultiword(index) and isEmpty(config.getAsFeature(index, "HEAD"))]
rootCandidates = [index for index in config.stack if not config.isMultiword(index) and (isEmpty(config.getAsFeature(index, "HEAD")) or config.getAsFeature(index, "HEAD") == Dicts.Dicts.erased)]
if len(rootCandidates) == 0 :
rootCandidates = [index for index in range(len(config.lines)) if not config.isMultiword(index) and isEmpty(config.getAsFeature(index, "HEAD"))]
rootCandidates = [index for index in range(len(config.lines)) if not config.isMultiword(index) and (isEmpty(config.getAsFeature(index, "HEAD")) or config.getAsFeature(index, "HEAD") == Dicts.Dicts.erased)]
if len(rootCandidates) == 0 :
print("ERROR : no candidates for root", file=sys.stderr)
......@@ -287,7 +288,7 @@ def applyEOS(config) :
config.set(rootIndex, "DEPREL", "root")
for index in range(len(config.lines)) :
if config.isMultiword(index) or not isEmpty(config.getAsFeature(index, "HEAD")) :
if config.isMultiword(index) or not (isEmpty(config.getAsFeature(index, "HEAD")) or config.getAsFeature(index, "HEAD") == Dicts.Dicts.erased) :
continue
config.set(index, "HEAD", str(rootIndex))
config.predChilds[rootIndex].append(index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment