import json
from readMCD import readMCD

################################################################################
class Dicts :
  def __init__(self) :
    self.dicts = {}
    self.unkToken = "__unknown__"
    self.nullToken = "__null__"
    self.noStackToken = "__nostack__"
    self.oobToken = "__oob__"
    self.noDepLeft = "__nodepleft__"
    self.noDepRight = "__nodepright__"
    self.noGov = "__nogov__"

  def readConllu(self, filename, colsSet=None) :
    defaultMCD = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC"
    col2index, index2col = readMCD(defaultMCD)

    targetColumns = []

    for line in open(filename, "r") :
      line = line.strip()
      if "# global.columns =" in line :
        mcd = line.split('=')[-1].strip()
        col2index, index2col = readMCD(mcd)
        continue
      if len(line) == 0 or line[0] == '#' :
        continue

      if len(targetColumns) == 0 :
        if colsSet is None :
          targetColumns = list(col2index.keys())
        else :
          targetColumns = list(colsSet)
        self.dicts = {col : {self.unkToken : 0, self.nullToken : 1, self.noStackToken : 2, self.oobToken : 3, self.noDepLeft : 4, self.noDepRight : 5, self.noGov : 6} for col in targetColumns}

      splited = line.split('\t')
      for col in targetColumns :
        value = splited[col2index[col]]
        if value not in self.dicts[col] :
          self.dicts[col][value] = len(self.dicts[col])

  def get(self, col, value) :
    if value in self.dicts[col] :
      return self.dicts[col][value]
    if value.lower() in self.dicts[col] :
      return self.dicts[col][value.lower()]
    return self.dicts[col][self.unkToken]

  def save(self, target) :
    json.dump(self.dicts, open(target, "w"))

  def load(self, target) :
    self.dicts = json.load(open(target, "r"))
################################################################################