Skip to content
Snippets Groups Projects
Commit 4268691e authored by Franck Dary's avatar Franck Dary
Browse files

Added featuresSet taking into account stack element governor POS

parent 9fa94f3b
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,8 @@ class Dicts :
self.dicts = {}
self.unkToken = "__unknown__"
self.nullToken = "__null__"
self.noStackToken = "__nostack__"
self.oobToken = "__oob__"
def readConllu(self, filename, colsSet=None) :
defaultMCD = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC"
......
import torch
import sys
from Util import isEmpty
################################################################################
def extractFeatures(dicts, config) :
return extractFeaturesPos(dicts, config)
return extractFeaturesPosExtended(dicts, config)
################################################################################
################################################################################
......@@ -17,15 +18,47 @@ def extractFeaturesPos(dicts, config) :
insertIndex = 0
for i in bufferWindow :
index = config.wordIndex + i
bufferPos = dicts.nullToken if index not in range(len(config.lines)) else config.getAsFeature(index, "UPOS")
bufferPos = dicts.oobToken if index not in range(len(config.lines)) else config.getAsFeature(index, "UPOS")
result[insertIndex] = dicts.get("UPOS", bufferPos)
insertIndex += 1
for i in stackWindow :
stackPos = dicts.nullToken if i not in range(len(config.stack)) else config.getAsFeature(config.stack[-1-i], "UPOS")
stackPos = dicts.noStackToken if i not in range(len(config.stack)) else config.getAsFeature(config.stack[-1-i], "UPOS")
result[insertIndex] = dicts.get("UPOS", stackPos)
insertIndex += 1
return result
################################################################################
################################################################################
# For each stack element, add its POS and the POS of its governor
def extractFeaturesPosExtended(dicts, config) :
bufferWindow = range(-2,2+1)
stackWindow = range(0,3+1)
totalSize = len(bufferWindow)+2*len(stackWindow)
result = torch.zeros(totalSize, dtype=torch.int)
insertIndex = 0
for i in bufferWindow :
index = config.wordIndex + i
bufferPos = dicts.oobToken if index not in range(len(config.lines)) else config.getAsFeature(index, "UPOS")
result[insertIndex] = dicts.get("UPOS", bufferPos)
insertIndex += 1
for i in stackWindow :
stackPos = dicts.noStackToken if i not in range(len(config.stack)) else config.getAsFeature(config.stack[-1-i], "UPOS")
stackGovHead = dicts.nullToken if i not in range(len(config.stack)) else config.getAsFeature(config.stack[-1-i], "HEAD")
stackGovPos = dicts.nullToken
if not isEmpty(stackGovHead) and stackGovHead != dicts.nullToken :
stackGovPos = config.getAsFeature(int(stackGovHead), "UPOS")
elif stackGovHead == dicts.nullToken :
stackGovPos = dicts.noStackToken
result[insertIndex] = dicts.get("UPOS", stackPos)
insertIndex += 1
result[insertIndex] = dicts.get("UPOS", stackGovPos)
insertIndex += 1
return result
################################################################################
import sys
import Config
################################################################################
def isEmpty(value) :
return value == "_" or value == ""
################################################################################
from Util import isEmpty
################################################################################
class Transition :
......
......@@ -5,3 +5,8 @@ def timeStamp() :
return "[%s]"%datetime.now().strftime("%H:%M:%S")
################################################################################
################################################################################
def isEmpty(value) :
return value == "_" or value == ""
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment