Skip to content
Snippets Groups Projects
Features.py 2.52 KiB
Newer Older
import torch
import sys

################################################################################
def extractFeatures(dicts, config) :
  return extractFeaturesPosExtended(dicts, config)
################################################################################

################################################################################
def extractFeaturesPos(dicts, config) :
  bufferWindow = range(-2,2+1)
  stackWindow = range(0,3+1)
  totalSize = len(bufferWindow)+len(stackWindow)

  result = torch.zeros(totalSize, dtype=torch.int)

  insertIndex = 0
  for i in bufferWindow :
    index = config.wordIndex + i
    bufferPos = dicts.oobToken if index not in range(len(config.lines)) else config.getAsFeature(index, "UPOS")
    result[insertIndex] = dicts.get("UPOS", bufferPos)
    insertIndex += 1

  for i in stackWindow :
    stackPos = dicts.noStackToken if i not in range(len(config.stack)) else config.getAsFeature(config.stack[-1-i], "UPOS")
    result[insertIndex] = dicts.get("UPOS", stackPos)
    insertIndex += 1

  return result
################################################################################

################################################################################
# For each stack element, add its POS and the POS of its governor
def extractFeaturesPosExtended(dicts, config) :
  bufferWindow = range(-2,2+1)
  stackWindow = range(0,3+1)
  totalSize = len(bufferWindow)+2*len(stackWindow)

  result = torch.zeros(totalSize, dtype=torch.int)

  insertIndex = 0
  for i in bufferWindow :
    index = config.wordIndex + i
    bufferPos = dicts.oobToken if index not in range(len(config.lines)) else config.getAsFeature(index, "UPOS")
    result[insertIndex] = dicts.get("UPOS", bufferPos)
    insertIndex += 1

  for i in stackWindow :
    stackPos = dicts.noStackToken if i not in range(len(config.stack)) else config.getAsFeature(config.stack[-1-i], "UPOS")
    stackGovHead = dicts.nullToken if i not in range(len(config.stack)) else config.getAsFeature(config.stack[-1-i], "HEAD")
    stackGovPos = dicts.nullToken
    if not isEmpty(stackGovHead) and stackGovHead != dicts.nullToken :
      stackGovPos = config.getAsFeature(int(stackGovHead), "UPOS")
    elif stackGovHead == dicts.nullToken :
      stackGovPos = dicts.noStackToken
    result[insertIndex] = dicts.get("UPOS", stackPos)
    insertIndex += 1
    result[insertIndex] = dicts.get("UPOS", stackGovPos)
    insertIndex += 1

  return result
################################################################################