Skip to content
Snippets Groups Projects
Commit 882bb55b authored by Franck Dary's avatar Franck Dary
Browse files
parents b9dfbce8 334d4661
No related branches found
No related tags found
No related merge requests found
...@@ -122,6 +122,19 @@ UNIVERSAL_FEATURES = { ...@@ -122,6 +122,19 @@ UNIVERSAL_FEATURES = {
} }
################################################################################
def filter_columns(columns) :
res = []
indexes = [0, 1, 3, 6, 7]
lengths = [4, 8, 8, 4, 8]
for (content, max_len) in [(columns[indexes[index]], lengths[index]) for index in range(len(indexes))] :
res.append(("{:"+str(max_len)+"}").format(content if len(content) <= max_len else "{}…{}".format(content[0:math.ceil((max_len-1)/2)],content[-((max_len-1)//2):])))
return res
################################################################################
################################################################################ ################################################################################
# UD Error is used when raising exceptions in this module # UD Error is used when raising exceptions in this module
class UDError(Exception) : class UDError(Exception) :
...@@ -526,43 +539,72 @@ def evaluate_wrapper(args) : ...@@ -526,43 +539,72 @@ def evaluate_wrapper(args) :
################################################################################ ################################################################################
def compute_errors(gold_file, system_file, evaluation, metric) : class Error :
class Error : def __init__(self, gold_file, system_file, gold_word, system_word, metric) :
def __init__(self, gold_file, system_file, gold_word, system_word, metric) : self.gold = gold_word
self.gold = gold_word self.pred = system_word
self.pred = system_word self.gold_sentence = gold_file.words[gold_file.sentences_words[self.gold.sentence].start:gold_file.sentences_words[self.gold.sentence].end]
self.gold_sentence = gold_file.words[gold_file.sentences_words[self.gold.sentence].start:gold_file.sentences_words[self.gold.sentence].end] self.pred_sentence = system_file.words[system_file.sentences_words[self.pred.sentence].start:system_file.sentences_words[self.pred.sentence].end]
self.pred_sentence = system_file.words[system_file.sentences_words[self.pred.sentence].start:system_file.sentences_words[self.pred.sentence].end] # TODO : do it for other than UPOS
# TODO : do it for other than UPOS self.type = self.gold.columns[UPOS]+"->"+self.pred.columns[UPOS]
self.type = gold.columns[UPOS]+"->"+pred.columns[UPOS] def __str__(self) :
result = []
class Errors : gold_lines = []
def __init__(self, metric) : pred_lines = []
self.types = [] for word in self.gold_sentence :
self.nb_errors = 0 gold_lines.append((">" if word == self.gold else " ") + " ".join(filter_columns(word.columns)))
self.metric = metric for word in self.pred_sentence :
def __len__(self) : pred_lines.append((">" if word == self.pred else " ") + " ".join(filter_columns(word.columns)))
return self.nb_errors
def add(self, error) : for index in range(max(len(gold_lines), len(pred_lines))) :
self.nb_errors += 1 result.append("{} | {}".format(gold_lines[index] if index < len(gold_lines) else "", pred_lines[index] if index < len(pred_lines) else ""))
for t in self.types : return "\n".join(result)
if t.type == error.type :
t.add(error) class Errors :
return def __init__(self, metric, errors1=None, errors2=None) :
self.types.append(ErrorType(error.type)) self.types = []
self.types[-1].add(error) self.nb_errors = 0
def sort(self) : self.metric = metric
self.types.sort(key=len, reverse=True) if errors1 is not None and errors2 is not None :
for type in errors1.types :
class ErrorType : for error in type.errors :
def __init__(self, error_type) : if not errors2.has(error) :
self.type = error_type self.add(error)
self.errors = [] def __len__(self) :
def __len__(self) : return self.nb_errors
return len(self.errors) def add(self, error) :
def add(self, error) : self.nb_errors += 1
self.errors.append(error) for t in self.types :
if t.type == error.type :
t.add(error)
return
self.types.append(ErrorType(error.type))
self.types[-1].add(error)
def has(self, error) :
for t in self.types :
if t.type == error.type :
return t.has(error)
def sort(self) :
self.types.sort(key=len, reverse=True)
class ErrorType :
def __init__(self, error_type) :
self.type = error_type
self.errors = []
def __len__(self) :
return len(self.errors)
def add(self, error) :
self.errors.append(error)
def has(self, error) :
for other_error in self.errors :
if other_error.gold == error.gold :
return True
return False
################################################################################
################################################################################
def compute_errors(gold_file, system_file, evaluation, metric) :
errors = Errors(metric) errors = Errors(metric)
for alignment_word in evaluation[metric][1] : for alignment_word in evaluation[metric][1] :
gold = alignment_word.gold_word gold = alignment_word.gold_word
...@@ -595,12 +637,16 @@ def main() : ...@@ -595,12 +637,16 @@ def main() :
# Evaluate # Evaluate
gold_ud, evaluations = evaluate_wrapper(args) gold_ud, evaluations = evaluate_wrapper(args)
errors_by_file = []
examples_list = []
for (system_ud, evaluation) in evaluations : for id1 in range(len(evaluations)) :
(system_ud, evaluation) = evaluations[id1]
fnamelen = len(system_ud.filename) fnamelen = len(system_ud.filename)
print("*"*math.ceil((80-2-fnamelen)/2),system_ud.filename,"*"*math.floor((80-2-fnamelen)/2)) print("*"*math.ceil((80-2-fnamelen)/2),system_ud.filename,"*"*math.floor((80-2-fnamelen)/2))
# Compute errors # Compute errors
errors_list = [compute_errors(gold_ud, system_ud, evaluation, metric) for metric in errors_metrics] errors_list = [compute_errors(gold_ud, system_ud, evaluation, metric) for metric in errors_metrics]
errors_by_file.append(errors_list)
# Print the evaluation # Print the evaluation
if args.counts : if args.counts :
...@@ -626,15 +672,61 @@ def main() : ...@@ -626,15 +672,61 @@ def main() :
"{:10.2f}".format(100 * evaluation[metric][0].aligned_accuracy) if evaluation[metric][0].aligned_accuracy is not None else "" "{:10.2f}".format(100 * evaluation[metric][0].aligned_accuracy) if evaluation[metric][0].aligned_accuracy is not None else ""
)) ))
for errors in errors_list : for id2 in range(len(errors_list)) :
errors = errors_list[id2]
errors.sort() errors.sort()
print("")
print("Most frequent errors for metric '{}' :".format(errors.metric)) print("Most frequent errors for metric '{}' :".format(errors.metric))
for error_type in errors.types[:10] : print("{:>12} {:>5} {:>6} {}\n {:->37}".format("ID", "NB", "%AGE", "GOLD->SYSTEM", ""))
print("{:>12} {:5} {:6.2f}%".format("Total", len(errors), 100))
for id3 in range(len(errors.types[:10])) :
error_type = errors.types[:10][id3]
t = error_type.type t = error_type.type
nb = len(error_type) nb = len(error_type)
percent = 100.0*nb/len(errors) percent = 100.0*nb/len(errors)
print("{:5} {:5.2f}% {}".format(nb, percent, t)) id = ":".join(map(str,[id1,id2,id3,"*"]))
print("{:>12} {:5} {:6.2f}% {}".format(id, nb, percent, t))
for id4 in range(len(error_type)) :
examples_list.append((":".join(map(str,[id1,id2,id3,id4])), error_type.errors[id4]))
print("")
for id1 in range(len(evaluations)) :
(system1_ud, evaluation) = evaluations[id1]
for id2 in range(len(evaluations)) :
if id1 == id2 :
continue
(system2_ud, evaluation) = evaluations[id2]
errors1 = errors_by_file[id1]
errors2 = errors_by_file[id2]
if len(errors1) > 0 :
print("{} Error comparison {}".format("*"*31, "*"*31))
print("{:>30} : {}".format("These errors are present in", system1_ud.filename))
print("{:>30} : {}".format("and not in", system2_ud.filename))
for id3 in range(len(errors1)) :
metric = errors1[id3].metric
errors_diff = Errors(metric, errors1[id3], errors2[id3])
errors_diff.sort()
print("{:>12} {:5} {:6.2f}%".format("Total", len(errors_diff), 100))
for id4 in range(len(errors_diff.types[:10])) :
error_type = errors_diff.types[:10][id4]
t = error_type.type
nb = len(error_type)
percent = 100.0*nb/len(errors)
id = ":".join(map(str,["d"+str(id1),id3,id4,"*"]))
print("{:>12} {:5} {:6.2f}% {}".format(id, nb, percent, t))
for id5 in range(len(error_type)) :
examples_list.append((":".join(map(str,["d"+str(id1),id3,id4,id5])), error_type.errors[id5]))
print("")
if len(examples_list) > 0 :
print("{}List of all errors by their ID{}".format("*"*25,"*"*25))
print("{}{:^30}{}\n".format("*"*25,"Format is GOLD | PREDICTED","*"*25))
for (id,error) in examples_list :
print("ID="+id)
print(error)
print("")
################################################################################ ################################################################################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment