Skip to content
Snippets Groups Projects
Commit 9d2a9871 authored by Franck Dary's avatar Franck Dary
Browse files

script to check problem also output sentences without any problem

parent 7b65bf7f
No related branches found
No related tags found
No related merge requests found
...@@ -24,11 +24,11 @@ def checkMCD(mcd) : ...@@ -24,11 +24,11 @@ def checkMCD(mcd) :
################################################################################ ################################################################################
def logError(message, sentence) : def logError(message, sentence) :
print(message) print(message, file=sys.stderr)
for line in sentence : for line in sentence :
for col in line : for col in line :
print(col,end="\t") print(col,end="\t", file=sys.stderr)
print("") print("", file=sys.stderr)
################################################################################ ################################################################################
################################################################################ ################################################################################
...@@ -43,17 +43,19 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) : ...@@ -43,17 +43,19 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) :
multiWordEmptyNodes = set() multiWordEmptyNodes = set()
id2index = {} id2index = {}
hadErr = False
# Verifying IDS # Verifying IDS
for i in range(len(sentence)) : for i in range(len(sentence)) :
for col in sentence[i] : for col in sentence[i] :
if len(col) == 0 : if len(col) == 0 :
logError("Empty column on line %s"%(fileLineIndex+i), sentence) logError("Empty column on line %s"%(fileLineIndex+i), sentence)
return return False
idStr = sentence[i][idIndex] idStr = sentence[i][idIndex]
if idStr in id2index : if idStr in id2index :
logError("ERROR in IDs : line %s '%s' already seen"%(fileLineIndex+i,idStr), sentence) logError("ERROR in IDs : line %s '%s' already seen"%(fileLineIndex+i,idStr), sentence)
hadErr = True
id2index[idStr] = i id2index[idStr] = i
...@@ -63,11 +65,13 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) : ...@@ -63,11 +65,13 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) :
multiWordEmptyNodes.add(i) multiWordEmptyNodes.add(i)
if int(splited[0]) != curId+1 or int(splited[0]) >= int(splited[1]) : if int(splited[0]) != curId+1 or int(splited[0]) >= int(splited[1]) :
logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence) logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence)
hadErr = True
elif len(idStr.split('.')) == 2 : elif len(idStr.split('.')) == 2 :
multiWordEmptyNodes.add(i) multiWordEmptyNodes.add(i)
splited = idStr.split('.') splited = idStr.split('.')
if int(splited[0]) != curId or int(splited[1]) != curDigit : if int(splited[0]) != curId or int(splited[1]) != curDigit :
logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence) logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence)
hadErr = True
curDigit += 1 curDigit += 1
elif idStr.isdigit() : elif idStr.isdigit() :
curId += 1 curId += 1
...@@ -75,8 +79,10 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) : ...@@ -75,8 +79,10 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) :
maxId = max(maxId, int(idStr)) maxId = max(maxId, int(idStr))
if int(idStr) != curId : if int(idStr) != curId :
logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence) logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence)
hadErr = True
else : else :
logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence) logError("ERROR in line %s, IDs : %s"%(fileLineIndex+i,idStr), sentence)
hadErr = True
nbRoot = 0 nbRoot = 0
# Verifying root # Verifying root
...@@ -87,6 +93,7 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) : ...@@ -87,6 +93,7 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) :
if nbRoot != 1 : if nbRoot != 1 :
logError("ERROR %d root in sentence on line %s:"%(nbRoot,fileLineIndex), sentence) logError("ERROR %d root in sentence on line %s:"%(nbRoot,fileLineIndex), sentence)
hadErr = True
# Verifying govs # Verifying govs
for i in range(len(sentence)) : for i in range(len(sentence)) :
...@@ -95,8 +102,11 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) : ...@@ -95,8 +102,11 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) :
govStr = sentence[i][govIndex] govStr = sentence[i][govIndex]
if not govStr.isdigit() : if not govStr.isdigit() :
logError("ERROR line %d gov \'%s\' is not positive integer :"%(fileLineIndex+i,govStr), sentence) logError("ERROR line %d gov \'%s\' is not positive integer :"%(fileLineIndex+i,govStr), sentence)
hadErr = True
if int(govStr) > maxId : if int(govStr) > maxId :
logError("ERROR line %d gov \'%s\' is out of sentence :"%(fileLineIndex+i,govStr), sentence) logError("ERROR line %d gov \'%s\' is out of sentence :"%(fileLineIndex+i,govStr), sentence)
hadErr = True
# Verifying cycles # Verifying cycles
alreadyReported = {} alreadyReported = {}
...@@ -114,8 +124,11 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) : ...@@ -114,8 +124,11 @@ def checkSentence(fileLineIndex, sentence, conllMCD, conllMCDr) :
if currentNode in alreadySeen : if currentNode in alreadySeen :
if currentNode not in alreadyReported : if currentNode not in alreadyReported :
logError("ERROR line %d (id=%s) loop in governors :"%(fileLineIndex+currentNode, sentence[currentNode][idIndex]), sentence) logError("ERROR line %d (id=%s) loop in governors :"%(fileLineIndex+currentNode, sentence[currentNode][idIndex]), sentence)
hadErr = True
alreadyReported[currentNode] = True alreadyReported[currentNode] = True
break break
return not hadErr
################################################################################ ################################################################################
...@@ -139,13 +152,17 @@ if __name__ == "__main__" : ...@@ -139,13 +152,17 @@ if __name__ == "__main__" :
if len(clean) < 3 : if len(clean) < 3 :
if sentFirstLine == -1 : if sentFirstLine == -1 :
exit(1) exit(1)
checkSentence(sentFirstLine, sentence, conllMCDr, conllMCD) if checkSentence(sentFirstLine, sentence, conllMCDr, conllMCD) :
for line in sentence :
print("\t".join(line))
print("")
sentence = [] sentence = []
sentFirstLine = -1 sentFirstLine = -1
continue continue
if clean[0] == '#' : if clean[0] == '#' :
splited = line.split("global.columns =") splited = line.split("global.columns =")
if len(splited) > 1 : if len(splited) > 1 :
print(line.strip())
conllMCD, conllMCDr = readMCD(splited[-1].strip()) conllMCD, conllMCDr = readMCD(splited[-1].strip())
checkMCD(conllMCD) checkMCD(conllMCD)
continue continue
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment