Skip to content
Snippets Groups Projects
Select Git revision
  • a953f55dea9a1e372688a5f593c0eb533cd0d54c
  • develop default protected
  • feature/xfce
  • master
4 results

initdockerclient.sh

Blame
  • conlluAddMissingColumns.py 1.56 KiB
    #! /usr/bin/python3
    
    import sys
    
    def printUsageAndExit() :
      print("USAGE : %s file.conllu mcd"%sys.argv[0], file=sys.stderr)
      exit(1)
    
    def readMCD(mcdFilename) :
      mcd = {}
      for line in open(mcdFilename, "r", encoding="utf8") :
        clean = line.strip()
        if len(line) < 2 or line[0] == '#' :
          continue
        splited = line.split(' ')
        if len(splited) != 2 :
          print("ERROR : invalid mcd line \'%s\'. Aborting"%line, file=sys.stderr)
          exit(1)
        mcd[splited[0].strip()] = splited[1].strip()
    
      return mcd
    
    if __name__ == "__main__" :
      if len(sys.argv) != 3 :
        printUsageAndExit()
    
      conllMCD = readMCD(sys.argv[2])
      conllMCDr = {v: k for k, v in conllMCD.items()} 
    
      lastWasEmpty = False
    
      for line in open(sys.argv[1], "r") :
        lastWasEmpty = False
        if len(line.strip()) < 2 :
          lastWasEmpty = True
          print(line.strip())
          continue
        elif line[0] == '#' :
          print(line.strip())
          continue
        columns = line.strip().split('\t')
        for col in conllMCD :
          while len(columns) <= int(col) :
            columns.append("")
        for i in range(len(columns)) :
          suffix = "\t"
          if i == len(columns)-1 :
            suffix = "\n"
          if len(columns[i]) > 0 :
            print(columns[i], end=suffix)
          elif conllMCD[str(i)] == "GOV" :
            id = columns[int(conllMCDr["ID"])]
            if id == "1" :
              print("0", end=suffix)
            elif len(id.split('-')) > 1 :
              print("_", end=suffix)
            else :
              print("1", end=suffix)
          else :
            print("_", end=suffix)
    
      if not lastWasEmpty :
        print("")