Skip to content
Snippets Groups Projects
Commit 882bb55b authored by Franck Dary's avatar Franck Dary
Browse files
parents b9dfbce8 334d4661
Branches
No related tags found
No related merge requests found
...@@ -122,6 +122,19 @@ UNIVERSAL_FEATURES = { ...@@ -122,6 +122,19 @@ UNIVERSAL_FEATURES = {
} }
################################################################################
def filter_columns(columns) :
res = []
indexes = [0, 1, 3, 6, 7]
lengths = [4, 8, 8, 4, 8]
for (content, max_len) in [(columns[indexes[index]], lengths[index]) for index in range(len(indexes))] :
res.append(("{:"+str(max_len)+"}").format(content if len(content) <= max_len else "{}…{}".format(content[0:math.ceil((max_len-1)/2)],content[-((max_len-1)//2):])))
return res
################################################################################
################################################################################ ################################################################################
# UD Error is used when raising exceptions in this module # UD Error is used when raising exceptions in this module
class UDError(Exception) : class UDError(Exception) :
...@@ -526,7 +539,6 @@ def evaluate_wrapper(args) : ...@@ -526,7 +539,6 @@ def evaluate_wrapper(args) :
################################################################################ ################################################################################
def compute_errors(gold_file, system_file, evaluation, metric) :
class Error : class Error :
def __init__(self, gold_file, system_file, gold_word, system_word, metric) : def __init__(self, gold_file, system_file, gold_word, system_word, metric) :
self.gold = gold_word self.gold = gold_word
...@@ -534,13 +546,30 @@ def compute_errors(gold_file, system_file, evaluation, metric) : ...@@ -534,13 +546,30 @@ def compute_errors(gold_file, system_file, evaluation, metric) :
self.gold_sentence = gold_file.words[gold_file.sentences_words[self.gold.sentence].start:gold_file.sentences_words[self.gold.sentence].end] self.gold_sentence = gold_file.words[gold_file.sentences_words[self.gold.sentence].start:gold_file.sentences_words[self.gold.sentence].end]
self.pred_sentence = system_file.words[system_file.sentences_words[self.pred.sentence].start:system_file.sentences_words[self.pred.sentence].end] self.pred_sentence = system_file.words[system_file.sentences_words[self.pred.sentence].start:system_file.sentences_words[self.pred.sentence].end]
# TODO : do it for other than UPOS # TODO : do it for other than UPOS
self.type = gold.columns[UPOS]+"->"+pred.columns[UPOS] self.type = self.gold.columns[UPOS]+"->"+self.pred.columns[UPOS]
def __str__(self) :
result = []
gold_lines = []
pred_lines = []
for word in self.gold_sentence :
gold_lines.append((">" if word == self.gold else " ") + " ".join(filter_columns(word.columns)))
for word in self.pred_sentence :
pred_lines.append((">" if word == self.pred else " ") + " ".join(filter_columns(word.columns)))
for index in range(max(len(gold_lines), len(pred_lines))) :
result.append("{} | {}".format(gold_lines[index] if index < len(gold_lines) else "", pred_lines[index] if index < len(pred_lines) else ""))
return "\n".join(result)
class Errors : class Errors :
def __init__(self, metric) : def __init__(self, metric, errors1=None, errors2=None) :
self.types = [] self.types = []
self.nb_errors = 0 self.nb_errors = 0
self.metric = metric self.metric = metric
if errors1 is not None and errors2 is not None :
for type in errors1.types :
for error in type.errors :
if not errors2.has(error) :
self.add(error)
def __len__(self) : def __len__(self) :
return self.nb_errors return self.nb_errors
def add(self, error) : def add(self, error) :
...@@ -551,6 +580,10 @@ def compute_errors(gold_file, system_file, evaluation, metric) : ...@@ -551,6 +580,10 @@ def compute_errors(gold_file, system_file, evaluation, metric) :
return return
self.types.append(ErrorType(error.type)) self.types.append(ErrorType(error.type))
self.types[-1].add(error) self.types[-1].add(error)
def has(self, error) :
for t in self.types :
if t.type == error.type :
return t.has(error)
def sort(self) : def sort(self) :
self.types.sort(key=len, reverse=True) self.types.sort(key=len, reverse=True)
...@@ -562,7 +595,16 @@ def compute_errors(gold_file, system_file, evaluation, metric) : ...@@ -562,7 +595,16 @@ def compute_errors(gold_file, system_file, evaluation, metric) :
return len(self.errors) return len(self.errors)
def add(self, error) : def add(self, error) :
self.errors.append(error) self.errors.append(error)
def has(self, error) :
for other_error in self.errors :
if other_error.gold == error.gold :
return True
return False
################################################################################
################################################################################
def compute_errors(gold_file, system_file, evaluation, metric) :
errors = Errors(metric) errors = Errors(metric)
for alignment_word in evaluation[metric][1] : for alignment_word in evaluation[metric][1] :
gold = alignment_word.gold_word gold = alignment_word.gold_word
...@@ -595,12 +637,16 @@ def main() : ...@@ -595,12 +637,16 @@ def main() :
# Evaluate # Evaluate
gold_ud, evaluations = evaluate_wrapper(args) gold_ud, evaluations = evaluate_wrapper(args)
errors_by_file = []
examples_list = []
for (system_ud, evaluation) in evaluations : for id1 in range(len(evaluations)) :
(system_ud, evaluation) = evaluations[id1]
fnamelen = len(system_ud.filename) fnamelen = len(system_ud.filename)
print("*"*math.ceil((80-2-fnamelen)/2),system_ud.filename,"*"*math.floor((80-2-fnamelen)/2)) print("*"*math.ceil((80-2-fnamelen)/2),system_ud.filename,"*"*math.floor((80-2-fnamelen)/2))
# Compute errors # Compute errors
errors_list = [compute_errors(gold_ud, system_ud, evaluation, metric) for metric in errors_metrics] errors_list = [compute_errors(gold_ud, system_ud, evaluation, metric) for metric in errors_metrics]
errors_by_file.append(errors_list)
# Print the evaluation # Print the evaluation
if args.counts : if args.counts :
...@@ -626,15 +672,61 @@ def main() : ...@@ -626,15 +672,61 @@ def main() :
"{:10.2f}".format(100 * evaluation[metric][0].aligned_accuracy) if evaluation[metric][0].aligned_accuracy is not None else "" "{:10.2f}".format(100 * evaluation[metric][0].aligned_accuracy) if evaluation[metric][0].aligned_accuracy is not None else ""
)) ))
for errors in errors_list : for id2 in range(len(errors_list)) :
errors = errors_list[id2]
errors.sort() errors.sort()
print("")
print("Most frequent errors for metric '{}' :".format(errors.metric)) print("Most frequent errors for metric '{}' :".format(errors.metric))
for error_type in errors.types[:10] : print("{:>12} {:>5} {:>6} {}\n {:->37}".format("ID", "NB", "%AGE", "GOLD->SYSTEM", ""))
print("{:>12} {:5} {:6.2f}%".format("Total", len(errors), 100))
for id3 in range(len(errors.types[:10])) :
error_type = errors.types[:10][id3]
t = error_type.type t = error_type.type
nb = len(error_type) nb = len(error_type)
percent = 100.0*nb/len(errors) percent = 100.0*nb/len(errors)
print("{:5} {:5.2f}% {}".format(nb, percent, t)) id = ":".join(map(str,[id1,id2,id3,"*"]))
print("{:>12} {:5} {:6.2f}% {}".format(id, nb, percent, t))
for id4 in range(len(error_type)) :
examples_list.append((":".join(map(str,[id1,id2,id3,id4])), error_type.errors[id4]))
print("")
for id1 in range(len(evaluations)) :
(system1_ud, evaluation) = evaluations[id1]
for id2 in range(len(evaluations)) :
if id1 == id2 :
continue
(system2_ud, evaluation) = evaluations[id2]
errors1 = errors_by_file[id1]
errors2 = errors_by_file[id2]
if len(errors1) > 0 :
print("{} Error comparison {}".format("*"*31, "*"*31))
print("{:>30} : {}".format("These errors are present in", system1_ud.filename))
print("{:>30} : {}".format("and not in", system2_ud.filename))
for id3 in range(len(errors1)) :
metric = errors1[id3].metric
errors_diff = Errors(metric, errors1[id3], errors2[id3])
errors_diff.sort()
print("{:>12} {:5} {:6.2f}%".format("Total", len(errors_diff), 100))
for id4 in range(len(errors_diff.types[:10])) :
error_type = errors_diff.types[:10][id4]
t = error_type.type
nb = len(error_type)
percent = 100.0*nb/len(errors)
id = ":".join(map(str,["d"+str(id1),id3,id4,"*"]))
print("{:>12} {:5} {:6.2f}% {}".format(id, nb, percent, t))
for id5 in range(len(error_type)) :
examples_list.append((":".join(map(str,["d"+str(id1),id3,id4,id5])), error_type.errors[id5]))
print("")
if len(examples_list) > 0 :
print("{}List of all errors by their ID{}".format("*"*25,"*"*25))
print("{}{:^30}{}\n".format("*"*25,"Format is GOLD | PREDICTED","*"*25))
for (id,error) in examples_list :
print("ID="+id)
print(error)
print("")
################################################################################ ################################################################################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment