#! /usr/bin/python3

import sys
from readMCD import readMCD

rules = {}
prefix = "SPLITWORD "

def printUsageAndExit() :
  print("USAGE : %s file.conllu conllu.mcd"%sys.argv[0], file=sys.stderr)
  exit(1)

def computeRules(sentence) :
  wordById = {}
  for word in sentence :
    splited = word[0].split("-")
    if len(splited) > 1 :
      continue
    wordById[word[0]] = word[1]

  for word in sentence :
    splited = word[0].split("-")
    if len(splited) > 1 :
      rule = ""
      for id in range(int(splited[0]),int(splited[-1])+1) :
        rule += "@" + wordById[str(id)]
      if word[1] in rules :
        if rule in rules[word[1]] :
          rules[word[1]][rule] += 1
        else :
          rules[word[1]][rule] = 1
      else :
        rules[word[1]] = {}
        rules[word[1]][rule] = 1

def main() :

  sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)

  if len(sys.argv) != 3 :
    printUsageAndExit()

  conllMCD, conllMCDr = readMCD(sys.argv[2])

  idId = int(conllMCDr["ID"])
  idForm = int(conllMCDr["FORM"])

  sentence = []

  for line in open(sys.argv[1], "r", encoding="utf8") :
    if len(line.strip()) < 2 or line[0] == '#' :
      if len(sentence) > 0 :
        computeRules(sentence)
      sentence = []
      continue

    splited = line.strip().split('\t')
    sentence += [[splited[idId], splited[idForm]]]

  for word in rules :
    if len(rules[word]) > 1 :
      print("WARNING : Abiguity detected in \'%s\'"%(word+" "+str(rules[word])), file=sys.stderr)
    toPrint = []
    for rule in rules[word] :
      toPrint.append([len(rule.split('@')), prefix+word+rule])
    toPrint.sort(reverse=True)
    for rule in toPrint :
      print(rule[1])

if __name__ == "__main__" :
  main()