From 651a04d854544b6a826d8904d39e4e2832db46e2 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 24 Apr 2020 15:40:27 +0200 Subject: [PATCH] conlluCheckProblems.py is now able to find cycles in syntactic tree --- scripts/conlluCheckProblems.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/scripts/conlluCheckProblems.py b/scripts/conlluCheckProblems.py index e912e0a..c255f9a 100755 --- a/scripts/conlluCheckProblems.py +++ b/scripts/conlluCheckProblems.py @@ -41,6 +41,7 @@ def checkSentence(sentence, conllMCD, conllMCDr) : curDigit = 1 maxId = 0 multiWordEmptyNodes = set() + id2index = {} # Verifying IDS for i in range(len(sentence)) : @@ -50,6 +51,10 @@ def checkSentence(sentence, conllMCD, conllMCDr) : errorAndExit("Empty column", sentence) idStr = sentence[i][idIndex] + if idStr in id2index : + errorAndExit("ERROR in IDs : '%s' already seen"%idStr, sentence) + + id2index[idStr] = i if len(idStr.split('-')) == 2 : curDigit = 1 @@ -92,6 +97,24 @@ def checkSentence(sentence, conllMCD, conllMCDr) : if int(govStr) > maxId : errorAndExit("ERROR line %d gov \'%s\' is out of sentence :"%(i+1,govStr), sentence) +# Verifying cycles + for i in range(len(sentence)) : + if i in multiWordEmptyNodes : + continue + alreadySeen = {} + currentNode = i + while True : + alreadySeen[currentNode] = True + govStr = sentence[currentNode][govIndex] + if govStr == "0" : + break + currentNode = id2index[govStr] + if currentNode in alreadySeen : + errorAndExit("ERROR line %d loop in governors :"%(i+1), sentence) +################################################################################ + + +################################################################################ if __name__ == "__main__" : if len(sys.argv) != 3 : printUsageAndExit() -- GitLab