Skip to content
Snippets Groups Projects
Commit 651a04d8 authored by Franck Dary's avatar Franck Dary
Browse files

conlluCheckProblems.py is now able to find cycles in syntactic tree

parent ad0945a4
No related branches found
No related tags found
No related merge requests found
......@@ -41,6 +41,7 @@ def checkSentence(sentence, conllMCD, conllMCDr) :
curDigit = 1
maxId = 0
multiWordEmptyNodes = set()
id2index = {}
# Verifying IDS
for i in range(len(sentence)) :
......@@ -50,6 +51,10 @@ def checkSentence(sentence, conllMCD, conllMCDr) :
errorAndExit("Empty column", sentence)
idStr = sentence[i][idIndex]
if idStr in id2index :
errorAndExit("ERROR in IDs : '%s' already seen"%idStr, sentence)
id2index[idStr] = i
if len(idStr.split('-')) == 2 :
curDigit = 1
......@@ -92,6 +97,24 @@ def checkSentence(sentence, conllMCD, conllMCDr) :
if int(govStr) > maxId :
errorAndExit("ERROR line %d gov \'%s\' is out of sentence :"%(i+1,govStr), sentence)
# Verifying cycles
for i in range(len(sentence)) :
if i in multiWordEmptyNodes :
continue
alreadySeen = {}
currentNode = i
while True :
alreadySeen[currentNode] = True
govStr = sentence[currentNode][govIndex]
if govStr == "0" :
break
currentNode = id2index[govStr]
if currentNode in alreadySeen :
errorAndExit("ERROR line %d loop in governors :"%(i+1), sentence)
################################################################################
################################################################################
if __name__ == "__main__" :
if len(sys.argv) != 3 :
printUsageAndExit()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment