Skip to content
Snippets Groups Projects
Commit 3f1c2788 authored by Franck Dary's avatar Franck Dary
Browse files

Working random decode. Using argparser. Added evaluation script.

parent bbf9e502
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,7 @@ class Config :
self.predicted = set({"HEAD", "DEPREL"})
self.wordIndex = 0
self.stack = []
self.comments = []
def addLine(self, cols) :
self.lines.append([[val,""] for val in cols])
......@@ -71,23 +72,29 @@ class Config :
value = str(self.getAsFeature(lineIndex, self.index2col[colIndex]))
if value == "" :
value = "_"
elif self.index2col[colIndex] == "HEAD" and value != "0":
elif self.index2col[colIndex] == "HEAD" and value != "-1":
value = self.getAsFeature(int(value), "ID")
elif self.index2col[colIndex] == "HEAD" and value == "-1":
value = "0"
toPrint.append(value)
print("\t".join(toPrint), file=output)
print("", file=output)
def print(self, output) :
print("# global.columns = %s"%(" ".join(self.col2index.keys())), file=output)
def print(self, output, header=False) :
if header :
print("# global.columns = %s"%(" ".join(self.col2index.keys())), file=output)
print("\n".join(self.comments))
for index in range(len(self.lines)) :
toPrint = []
for colIndex in range(len(self.lines[index])) :
value = str(self.getAsFeature(index, self.index2col[colIndex]))
if value == "" :
value = "_"
elif self.index2col[colIndex] == "HEAD" and value != "0":
elif self.index2col[colIndex] == "HEAD" and value != "-1":
value = self.getAsFeature(int(value), "ID")
elif self.index2col[colIndex] == "HEAD" and value == "-1":
value = "0"
toPrint.append(value)
print("\t".join(toPrint), file=output)
print("")
......@@ -100,12 +107,14 @@ def readConllu(filename) :
col2index, index2col = readMCD(defaultMCD)
currentIndex = 0
id2index = {}
comments = []
for line in open(filename, "r") :
line = line.strip()
if "# global.columns =" in line :
mcd = line.split('=')[-1].strip()
col2index, index2col = readMCD(mcd)
continue
if len(line) == 0 :
for index in range(len(configs[-1])) :
head = configs[-1].getGold(index, "HEAD")
......@@ -115,16 +124,22 @@ def readConllu(filename) :
continue
configs[-1].set(index, "HEAD", id2index[head], False)
configs[-1].comments = comments
configs.append(Config(col2index, index2col))
currentIndex = 0
id2index = {}
comments = []
continue
if line[0] == '#' :
comments.append(line)
continue
if len(configs) == 0 :
configs.append(Config(col2index, index2col))
currentIndex = 0
id2index = {}
splited = line.split('\t')
......
......@@ -44,7 +44,7 @@ class Transition :
if self.name == "SHIFT" :
return config.wordIndex < len(config.lines) - 1
if self.name == "REDUCE" :
return len(config.stack) > 0
return len(config.stack) > 0 and not isEmpty(config.getAsFeature(config.stack[-1], "HEAD"))
if self.name == "EOS" :
return config.wordIndex == len(config.lines) - 1
......@@ -77,18 +77,18 @@ def applyReduce(config) :
################################################################################
def applyEOS(config) :
rootCandidates = [index for index in config.stack if isEmpty(config.getAsFeature(index, "HEAD"))]
rootCandidates = [index for index in config.stack if not config.isMultiword(index) and isEmpty(config.getAsFeature(index, "HEAD"))]
if len(rootCandidates) == 0 :
print("ERROR : no candidates for root", file=sys.stderr)
config.printForDebug(sys.stderr)
exit(1)
rootIndex = rootCandidates[0]
config.set(rootIndex, "HEAD", "0")
config.set(rootIndex, "HEAD", "-1")
config.set(rootIndex, "DEPREL", "root")
for index in range(len(config.lines)) :
if not isEmpty(config.getAsFeature(index, "HEAD")) :
if config.isMultiword(index) or not isEmpty(config.getAsFeature(index, "HEAD")) :
continue
config.set(index, "HEAD", str(rootIndex))
################################################################################
......
#!/usr/bin/env python3
# Compatible with Python 2.7 and 3.2+, can be used either as a module
# or a standalone executable.
#
# Copyright 2017, 2018 Institute of Formal and Applied Linguistics (UFAL),
# Faculty of Mathematics and Physics, Charles University, Czech Republic.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Authors: Milan Straka, Martin Popel <surname@ufal.mff.cuni.cz>
#
# Changelog:
# - [12 Apr 2018] Version 0.9: Initial release.
# - [19 Apr 2018] Version 1.0: Fix bug in MLAS (duplicate entries in functional_children).
# Add --counts option.
# - [02 May 2018] Version 1.1: When removing spaces to match gold and system characters,
# consider all Unicode characters of category Zs instead of
# just ASCII space.
# - [25 Jun 2018] Version 1.2: Use python3 in the she-bang (instead of python).
# In Python2, make the whole computation use `unicode` strings.
#
# Updated by Franck Dary for Macaon
# Command line usage
# ------------------
# conll18_ud_eval.py gold_conllu_file system_conllu_file
#
# Metrics printed (as precision, recall, F1 score,
# and in case the metric is computed on aligned words also accuracy on these):
# - Tokens: how well do the gold tokens match system tokens
# - Sentences: how well do the gold sentences match system sentences
# - Words: how well can the gold words be aligned to system words
# - UPOS: using aligned words, how well does UPOS match
# - XPOS: using aligned words, how well does XPOS match
# - UFeats: using aligned words, how well does universal FEATS match
# - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match
# - Lemmas: using aligned words, how well does LEMMA match
# - UAS: using aligned words, how well does HEAD match
# - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match
# - CLAS: using aligned words with content DEPREL, how well does
# HEAD+DEPREL(ignoring subtypes) match
# - MLAS: using aligned words with content DEPREL, how well does
# HEAD+DEPREL(ignoring subtypes)+UPOS+UFEATS+FunctionalChildren(DEPREL+UPOS+UFEATS) match
# - BLEX: using aligned words with content DEPREL, how well does
# HEAD+DEPREL(ignoring subtypes)+LEMMAS match
# - if -c is given, raw counts of correct/gold_total/system_total/aligned words are printed
# instead of precision/recall/F1/AlignedAccuracy for all metrics.
# API usage
# ---------
# - load_conllu(file)
# - loads CoNLL-U file from given file object to an internal representation
# - the file object should return str in both Python 2 and Python 3
# - raises UDError exception if the given file cannot be loaded
# - evaluate(gold_ud, system_ud)
# - evaluate the given gold and system CoNLL-U files (loaded with load_conllu)
# - raises UDError if the concatenated tokens of gold and system file do not match
# - returns a dictionary with the metrics described above, each metric having
# three fields: precision, recall and f1
# Description of token matching
# -----------------------------
# In order to match tokens of gold file and system file, we consider the text
# resulting from concatenation of gold tokens and text resulting from
# concatenation of system tokens. These texts should match -- if they do not,
# the evaluation fails.
#
# If the texts do match, every token is represented as a range in this original
# text, and tokens are equal only if their range is the same.
# Description of word matching
# ----------------------------
# When matching words of gold file and system file, we first match the tokens.
# The words which are also tokens are matched as tokens, but words in multi-word
# tokens have to be handled differently.
#
# To handle multi-word tokens, we start by finding "multi-word spans".
# Multi-word span is a span in the original text such that
# - it contains at least one multi-word token
# - all multi-word tokens in the span (considering both gold and system ones)
# are completely inside the span (i.e., they do not "stick out")
# - the multi-word span is as small as possible
#
# For every multi-word span, we align the gold and system words completely
# inside this span using LCS on their FORMs. The words not intersecting
# (even partially) any multi-word span are then aligned as tokens.
from __future__ import division
from __future__ import print_function
from readMCD import readMCD
import argparse
import io
import os
import sys
import unicodedata
import unittest
import math
# CoNLL-U column names
col2index = {}
index2col = {}
metric2colname = {
"UPOS" : "UPOS",
"Lemmas" : "LEMMA",
}
defaultColumns = {
"ID",
"FORM",
"UPOS",
"XPOS",
"LEMMA",
"FEATS",
"HEAD",
"DEPREL",
"DEPS",
"MISC",
}
# Content and functional relations
CONTENT_DEPRELS = {
"nsubj", "obj", "iobj", "csubj", "ccomp", "xcomp", "obl", "vocative",
"expl", "dislocated", "advcl", "advmod", "discourse", "nmod", "appos",
"nummod", "acl", "amod", "conj", "fixed", "flat", "compound", "list",
"parataxis", "orphan", "goeswith", "reparandum", "root", "dep"
}
FUNCTIONAL_DEPRELS = {
"aux", "cop", "mark", "det", "clf", "case", "cc"
}
UNIVERSAL_FEATURES = {
"PronType", "NumType", "Poss", "Reflex", "Foreign", "Abbr", "Gender",
"Animacy", "Number", "Case", "Definite", "Degree", "VerbForm", "Mood",
"Tense", "Aspect", "Voice", "Evident", "Polarity", "Person", "Polite"
}
################################################################################
def is_float(value) :
if not isinstance(value, str) :
return False
try :
float(value)
return True
except ValueError :
return False
################################################################################
################################################################################
def filter_columns(columns) :
res = []
cols = [("ID",4), ("FORM",8), ("UPOS",8), ("HEAD",4), ("DEPREL", 8)]
contents = [(columns[col2index[col]], max_size) for (col, max_size) in cols if col in col2index]
for (content, max_len) in contents :
res.append(("{:"+str(max_len)+"}").format(content if len(content) <= max_len else "{}…{}".format(content[0:math.ceil((max_len-1)/2)],content[-((max_len-1)//2):])))
return res
################################################################################
################################################################################
# UD Error is used when raising exceptions in this module
class UDError(Exception) :
pass
################################################################################
################################################################################
# Conversion methods handling `str` <-> `unicode` conversions in Python2
def _decode(text) :
return text if sys.version_info[0] >= 3 or not isinstance(text, str) else text.decode("utf-8")
################################################################################
################################################################################
def _encode(text) :
return text if sys.version_info[0] >= 3 or not isinstance(text, unicode) else text.encode("utf-8")
################################################################################
################################################################################
# Load given CoNLL-U file into internal representation
def load_conllu(file) :
global col2index
global index2col
# Internal representation classes
class UDRepresentation :
def __init__(self) :
# Characters of all the tokens in the whole file.
# Whitespace between tokens is not included.
self.characters = []
# List of UDSpan instances with start&end indices into `characters`.
self.tokens = []
# List of UDWord instances.
self.words = []
# List of UDSpan instances with start&end indices into `characters`.
self.sentences = []
# List of UDSpan instances with start&end indices into `words`.
self.sentences_words = []
# Name of the file this representation has been extracted from.
self.filename = ""
class UDSpan :
def __init__(self, start, end) :
self.start = start
# Note that self.end marks the first position **after the end** of span,
# so we can use characters[start:end] or range(start, end).
self.end = end
class UDWord :
def __init__(self, span, columns, is_multiword) :
# Index of the sentence this word is part of, within ud_representation.sentences.
self.sentence = None
# Span of this word (or MWT, see below) within ud_representation.characters.
self.span = span
# 10 columns of the CoNLL-U file: ID, FORM, LEMMA,...
self.columns = columns
# is_multiword==True means that this word is part of a multi-word token.
# In that case, self.span marks the span of the whole multi-word token.
self.is_multiword = is_multiword
# Reference to the UDWord instance representing the HEAD (or None if root).
self.parent = None
# List of references to UDWord instances representing functional-deprel children.
self.functional_children = []
# Only consider universal FEATS.
# TODO consider all feats
if "FEATS" in col2index :
self.columns[col2index["FEATS"]] = "|".join(sorted(feat for feat in columns[col2index["FEATS"]].split("|")
if feat.split("=", 1)[0] in UNIVERSAL_FEATURES))
if "DEPREL" in col2index :
# Let's ignore language-specific deprel subtypes.
self.columns[col2index["DEPREL"]] = columns[col2index["DEPREL"]].split(":")[0]
# Precompute which deprels are CONTENT_DEPRELS and which FUNCTIONAL_DEPRELS
self.is_content_deprel = self.columns[col2index["DEPREL"]] in CONTENT_DEPRELS
self.is_functional_deprel = self.columns[col2index["DEPREL"]] in FUNCTIONAL_DEPRELS
ud = UDRepresentation()
ud.filename = file.name
# Load the CoNLL-U file
index, sentence_start = 0, None
id_starts_at_zero = False
while True :
line = file.readline()
if not line :
break
line = _decode(line.rstrip("\r\n"))
# Handle sentence start boundaries
if sentence_start is None :
# Skip comments
if line.startswith("#") :
splited = line.split("global.columns =")
if len(splited) > 1 :
col2index, index2col = readMCD(splited[-1].strip())
continue
# Start a new sentence
sentence_start = len(ud.words)
ud.sentences.append(UDSpan(index, 0))
ud.sentences_words.append(UDSpan(sentence_start, 0))
if not line :
# Add parent and children UDWord links and check there are no cycles
def process_word(word) :
if "HEAD" in col2index :
if word.parent == "remapping" :
raise UDError("There is a cycle in a sentence")
if word.parent is None :
head = int(word.columns[col2index["HEAD"]])
if head < 0 or head > len(ud.words) - sentence_start :
raise UDError("HEAD '{}' points outside of the sentence".format(_encode(word.columns[col2index["HEAD"]])))
if head :
parent = ud.words[sentence_start + head - 1]
word.parent = "remapping"
process_word(parent)
word.parent = parent
for word in ud.words[sentence_start:] :
process_word(word)
# func_children cannot be assigned within process_word
# because it is called recursively and may result in adding one child twice.
for word in ud.words[sentence_start:] :
if "HEAD" in col2index and word.parent and word.is_functional_deprel :
word.parent.functional_children.append(word)
# Check there is a single root node
if "HEAD" in col2index and len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1 :
raise UDError("There are multiple roots in a sentence")
# End the sentence
ud.sentences[-1].end = index
ud.sentences_words[-1].end = len(ud.words)
sentence_start = None
continue
# Read next token/word
columns = line.split("\t")
# Skip empty nodes
if "ID" in col2index and "." in columns[col2index["ID"]] :
continue
# Delete spaces from FORM, so gold.characters == system.characters
# even if one of them tokenizes the space. Use any Unicode character
# with category Zs.
if "FORM" in col2index :
columns[col2index["FORM"]] = "".join(filter(lambda c: unicodedata.category(c) != "Zs", columns[col2index["FORM"]]))
if not columns[col2index["FORM"]] :
raise UDError("There is an empty FORM in the CoNLL-U file")
# Save token
form_value = columns[col2index["FORM"]] if "FORM" in col2index else "_"
ud.characters.extend(form_value)
ud.tokens.append(UDSpan(index, index + len(form_value)))
index += len(form_value)
# Handle multi-word tokens to save word(s)
if "ID" in col2index and "-" in columns[col2index["ID"]] :
try :
start, end = map(int, columns[col2index["ID"]].split("-"))
except :
raise UDError("Cannot parse multi-word token ID '{}'".format(_encode(columns[col2index["ID"]])))
for _ in range(start, end + 1) :
word_line = _decode(file.readline().rstrip("\r\n"))
word_columns = word_line.split("\t")
ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True))
ud.words[-1].sentence = len(ud.sentences)-1
# Basic tokens/words
else :
try :
word_id = int(columns[col2index["ID"]]) if "ID" in col2index else "_"
if word_id == 0 :
id_starts_at_zero = True
except :
raise UDError("Cannot parse word ID '{}'".format(_encode(columns[col2index["ID"]])))
if word_id != len(ud.words) - sentence_start + (0 if id_starts_at_zero else 1) :
raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(
_encode(columns[col2index["ID"]]), _encode(columns[col2index["FORM"]]), len(ud.words) - sentence_start + 1))
try :
head_id = int(columns[col2index["HEAD"]]) if "HEAD" in col2index else 0
except :
raise UDError("Cannot parse HEAD '{}'".format(_encode(columns[col2index["HEAD"]])))
if head_id < 0 :
raise UDError("HEAD cannot be negative")
ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False))
ud.words[-1].sentence = len(ud.sentences)-1
if sentence_start is not None :
raise UDError("The CoNLL-U file does not end with empty line")
return ud
################################################################################
################################################################################
# Evaluate the gold and system treebanks (loaded using load_conllu).
def evaluate(gold_ud, system_ud, extraColumns) :
class Score :
def __init__(self, gold_total, system_total, correct, aligned_total=None, isNumeric=False, R2=None) :
self.correct = correct[0]
self.gold_total = gold_total
self.system_total = system_total
self.aligned_total = aligned_total
if isNumeric :
self.precision = 0
self.recall = R2
self.f1 = correct[1]
self.aligned_accuracy = correct[0]
else :
self.precision = 100*correct[0] / system_total if system_total else 0.0
self.recall = 100*correct[0] / gold_total if gold_total else 0.0
self.f1 = 2 * 100*correct[0] / (system_total + gold_total) if system_total + gold_total else 0.0
self.aligned_accuracy = 100*correct[0] / aligned_total if aligned_total else aligned_total
class AlignmentWord :
def __init__(self, gold_word, system_word) :
self.gold_word = gold_word
self.system_word = system_word
class Alignment :
def __init__(self, gold_words, system_words) :
self.gold_words = gold_words
self.system_words = system_words
self.matched_words = []
self.matched_words_map = {}
def append_aligned_words(self, gold_word, system_word) :
self.matched_words.append(AlignmentWord(gold_word, system_word))
self.matched_words_map[system_word] = gold_word
def spans_score(gold_spans, system_spans) :
correct, gi, si = 0, 0, 0
while gi < len(gold_spans) and si < len(system_spans) :
if system_spans[si].start < gold_spans[gi].start :
si += 1
elif gold_spans[gi].start < system_spans[si].start :
gi += 1
else :
correct += gold_spans[gi].end == system_spans[si].end
si += 1
gi += 1
return [Score(len(gold_spans), len(system_spans), [correct])]
def alignment_score(alignment, key_fn=None, filter_fn=None) :
if filter_fn is not None :
gold = sum(1 for gold in alignment.gold_words if filter_fn(gold))
system = sum(1 for system in alignment.system_words if filter_fn(system))
aligned = sum(1 for word in alignment.matched_words if filter_fn(word.gold_word))
else :
gold = len(alignment.gold_words)
system = len(alignment.system_words)
aligned = len(alignment.matched_words)
if key_fn is None :
# Return score for whole aligned words
return [Score(gold, system, [aligned])]
def gold_aligned_gold(word) :
return word
def gold_aligned_system(word) :
return alignment.matched_words_map.get(word, "NotAligned") if word is not None else None
isNumericOnly = True
for words in alignment.matched_words :
if filter_fn is None or filter_fn(words.gold_word) :
goldItem = key_fn(words.gold_word, gold_aligned_gold)
systemItem = key_fn(words.system_word, gold_aligned_system)
if (not is_float(systemItem)) or (not is_float(goldItem)) :
isNumericOnly = False
break
correct = [0,0]
errors = []
goldValues = []
predictedValues = []
for words in alignment.matched_words :
if filter_fn is None or filter_fn(words.gold_word) :
goldItem = key_fn(words.gold_word, gold_aligned_gold)
systemItem = key_fn(words.system_word, gold_aligned_system)
if not isNumericOnly :
if goldItem == systemItem :
correct[0] += 1
else :
errors.append(words)
else :
correct[0] -= abs(float(goldItem) - float(systemItem))**1
correct[1] -= abs(float(goldItem) - float(systemItem))**2
goldValues.append(float(goldItem))
predictedValues.append(float(systemItem))
R2 = 0.0
if isNumericOnly and len(goldValues) > 0 :
correct[0] /= len(goldValues)
correct[1] /= len(goldValues)
goldMean = sum(goldValues) / len(goldValues)
predMean = sum(predictedValues) / len(predictedValues)
numerator = 0.0
denom1 = 0.0
denom2 = 0.0
for i in range(len(predictedValues)) :
numerator += (predictedValues[i]-predMean)*(goldValues[i]-goldMean)
denom1 += (predictedValues[i]-predMean)**2
denom2 += (goldValues[i]-goldMean)**2
pearson = 0.0
if denom1 > 0.0 and denom2 > 0.0 :
pearson = numerator/((denom1**0.5)*(denom2**0.5))
R2 = pearson**2
return [Score(gold, system, correct, aligned, isNumeric=isNumericOnly, R2=R2), errors]
def beyond_end(words, i, multiword_span_end) :
if i >= len(words) :
return True
if words[i].is_multiword :
return words[i].span.start >= multiword_span_end
return words[i].span.end > multiword_span_end
def extend_end(word, multiword_span_end) :
if word.is_multiword and word.span.end > multiword_span_end :
return word.span.end
return multiword_span_end
def find_multiword_span(gold_words, system_words, gi, si) :
# We know gold_words[gi].is_multiword or system_words[si].is_multiword.
# Find the start of the multiword span (gs, ss), so the multiword span is minimal.
# Initialize multiword_span_end characters index.
if gold_words[gi].is_multiword :
multiword_span_end = gold_words[gi].span.end
if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start :
si += 1
else : # if system_words[si].is_multiword
multiword_span_end = system_words[si].span.end
if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start :
gi += 1
gs, ss = gi, si
# Find the end of the multiword span
# (so both gi and si are pointing to the word following the multiword span end).
while not beyond_end(gold_words, gi, multiword_span_end) or \
not beyond_end(system_words, si, multiword_span_end) :
if gi < len(gold_words) and (si >= len(system_words) or
gold_words[gi].span.start <= system_words[si].span.start) :
multiword_span_end = extend_end(gold_words[gi], multiword_span_end)
gi += 1
else :
multiword_span_end = extend_end(system_words[si], multiword_span_end)
si += 1
return gs, ss, gi, si
def compute_lcs(gold_words, system_words, gi, si, gs, ss) :
lcs = [[0] * (si - ss) for i in range(gi - gs)]
for g in reversed(range(gi - gs)) :
for s in reversed(range(si - ss)) :
if gold_words[gs + g].columns[col2index["FORM"]].lower() == system_words[ss + s].columns[col2index["FORM"]].lower() :
lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0)
lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0)
lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0)
return lcs
def align_words(gold_words, system_words) :
alignment = Alignment(gold_words, system_words)
gi, si = 0, 0
while gi < len(gold_words) and si < len(system_words) :
if gold_words[gi].is_multiword or system_words[si].is_multiword :
# A: Multi-word tokens => align via LCS within the whole "multiword span".
gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si)
if si > ss and gi > gs :
lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss)
# Store aligned words
s, g = 0, 0
while g < gi - gs and s < si - ss :
if gold_words[gs + g].columns[col2index["FORM"]].lower() == system_words[ss + s].columns[col2index["FORM"]].lower() :
alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s])
g += 1
s += 1
elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0) :
g += 1
else :
s += 1
else :
# B: No multi-word token => align according to spans.
if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end) :
alignment.append_aligned_words(gold_words[gi], system_words[si])
gi += 1
si += 1
elif gold_words[gi].span.start <= system_words[si].span.start :
gi += 1
else :
si += 1
return alignment
# Check that the underlying character sequences do match.
if gold_ud.characters != system_ud.characters :
index = 0
while index < len(gold_ud.characters) and index < len(system_ud.characters) and \
gold_ud.characters[index] == system_ud.characters[index] :
index += 1
raise UDError(
"The concatenation of tokens in gold file and in system file differ!\n" +
"First 20 differing characters in gold file: '{}' and system file: '{}'".format(
"".join(map(_encode, gold_ud.characters[index:index + 20])),
"".join(map(_encode, system_ud.characters[index:index + 20]))
)
)
# Align words
alignment = align_words(gold_ud.words, system_ud.words)
# Compute the F1-scores
result = {}
if "FORM" in col2index :
result["Tokens"] = spans_score(gold_ud.tokens, system_ud.tokens)
result["Words"] = alignment_score(alignment)
if "UPOS" in col2index :
result["UPOS"] = alignment_score(alignment, lambda w, _ : w.columns[col2index["UPOS"]])
if "XPOS" in col2index :
result["XPOS"] = alignment_score(alignment, lambda w, _ : w.columns[col2index["XPOS"]])
if "FEATS" in col2index :
result["UFeats"] = alignment_score(alignment, lambda w, _ : w.columns[col2index["FEATS"]])
if "LEMMA" in col2index :
result["Lemmas"] = alignment_score(alignment, lambda w, ga : w.columns[col2index["LEMMA"]] if ga(w).columns[col2index["LEMMA"]] != "_" else "_")
if "HEAD" in col2index :
result["UAS"] = alignment_score(alignment, lambda w, ga : ga(w.parent))
if "DEPREL" in col2index :
result["LAS"] = alignment_score(alignment, lambda w, ga : (ga(w.parent), w.columns[col2index["DEPREL"]]))
if "DEPREL" in col2index and "UPOS" in col2index and "FEATS" in col2index :
result["MLAS"] = alignment_score(alignment, lambda w, ga: (ga(w.parent), w.columns[col2index["DEPREL"]], w.columns[col2index["UPOS"]], w.columns[col2index["FEATS"]], [(ga(c), c.columns[col2index["DEPREL"]], c.columns[col2index["UPOS"]], c.columns[col2index["FEATS"]]) for c in w.functional_children]), filter_fn=lambda w: w.is_content_deprel)
if "ID" in col2index :
result["Sentences"] = spans_score(gold_ud.sentences, system_ud.sentences)
for colName in col2index :
if colName in extraColumns and colName != "_" :
result[colName] = alignment_score(alignment, lambda w, _ : w.columns[col2index[colName]])
return result
################################################################################
################################################################################
def load_conllu_file(path) :
_file = open(path, mode="r", **({"encoding" : "utf-8"} if sys.version_info >= (3, 0) else {}))
return load_conllu(_file)
################################################################################
################################################################################
def evaluate_wrapper(args) :
# Load CoNLL-U files
gold_ud = load_conllu_file(args.gold_file)
system_files = [load_conllu_file(args.system_file)]
if args.system_file2 is not None :
system_files.append(load_conllu_file(args.system_file2))
return gold_ud, [(system, evaluate(gold_ud, system, set(args.extra.split(',')))) for system in system_files]
################################################################################
################################################################################
class Error :
def __init__(self, gold_file, system_file, gold_word, system_word, metric) :
self.gold = gold_word
self.pred = system_word
self.gold_sentence = gold_file.words[gold_file.sentences_words[self.gold.sentence].start:gold_file.sentences_words[self.gold.sentence].end]
self.pred_sentence = system_file.words[system_file.sentences_words[self.pred.sentence].start:system_file.sentences_words[self.pred.sentence].end]
self.type = self.gold.columns[col2index[metric2colname[metric]]]+"->"+self.pred.columns[col2index[metric2colname[metric]]]
def __str__(self) :
result = []
gold_lines = []
pred_lines = []
for word in self.gold_sentence :
gold_lines.append((">" if word == self.gold else " ") + " ".join(filter_columns(word.columns)))
for word in self.pred_sentence :
pred_lines.append((">" if word == self.pred else " ") + " ".join(filter_columns(word.columns)))
for index in range(max(len(gold_lines), len(pred_lines))) :
result.append("{} | {}".format(gold_lines[index] if index < len(gold_lines) else " "*len(pred_lines[index]), pred_lines[index] if index < len(pred_lines) else " "*len(gold_lines[index])))
return "\n".join(result)
class Errors :
def __init__(self, metric, errors1=None, errors2=None) :
self.types = []
self.nb_errors = 0
self.metric = metric
if errors1 is not None and errors2 is not None :
for type in errors1.types :
for error in type.errors :
if not errors2.has(error) :
self.add(error)
def __len__(self) :
return self.nb_errors
def add(self, error) :
self.nb_errors += 1
for t in self.types :
if t.type == error.type :
t.add(error)
return
self.types.append(ErrorType(error.type))
self.types[-1].add(error)
def has(self, error) :
for t in self.types :
if t.type == error.type :
return t.has(error)
def sort(self) :
self.types.sort(key=len, reverse=True)
class ErrorType :
def __init__(self, error_type) :
self.type = error_type
self.errors = []
def __len__(self) :
return len(self.errors)
def add(self, error) :
self.errors.append(error)
def has(self, error) :
for other_error in self.errors :
if other_error.gold == error.gold :
return True
return False
################################################################################
################################################################################
def compute_errors(gold_file, system_file, evaluation, metric) :
errors = Errors(metric)
for alignment_word in evaluation[metric][1] :
gold = alignment_word.gold_word
pred = alignment_word.system_word
error = Error(gold_file, system_file, gold, pred, metric)
errors.add(error)
return errors
################################################################################
################################################################################
def main() :
# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("gold_file", type=str,
help="Name of the CoNLL-U file with the gold data.")
parser.add_argument("system_file", type=str,
help="Name of the CoNLL-U file with the predicted data.")
parser.add_argument("--counts", "-c", default=False, action="store_true",
help="Print raw counts of correct/gold/system/aligned words instead of prec/rec/F1 for all metrics.")
parser.add_argument("--system_file2",
help="Name of another CoNLL-U file with predicted data, for error comparison.")
parser.add_argument("--enumerate_errors", "-e", default=None,
help="Comma separated list of column names for which to enumerate errors (e.g. \"UPOS,FEATS\").")
parser.add_argument("--extra", "-x", default="",
help="Comma separated list of column names for which to compute score (e.g. \"TIME,EOS\").")
args = parser.parse_args()
errors_metrics = [] if args.enumerate_errors is None else args.enumerate_errors.split(',')
global col2index
global index2col
col2index, index2col = readMCD("ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC")
# Evaluate
gold_ud, evaluations = evaluate_wrapper(args)
errors_by_file = []
examples_list = []
for id1 in range(len(evaluations)) :
(system_ud, evaluation) = evaluations[id1]
fnamelen = len(system_ud.filename)
print("*"*math.ceil((80-2-fnamelen)/2),system_ud.filename,"*"*math.floor((80-2-fnamelen)/2))
# Compute errors
errors_list = [compute_errors(gold_ud, system_ud, evaluation, metric) for metric in errors_metrics]
errors_by_file.append(errors_list)
maxColNameSize = 1 + max([len(colName) for colName in evaluation])
# Print the evaluation
if args.counts :
print("{:^{}}| Correct | Gold | Predicted | Aligned".format("Metric", maxColNameSize))
else :
print("{:^{}}| Precision | Recall | F1 Score | AligndAcc".format("Metric", maxColNameSize))
print("{}+-----------+-----------+-----------+-----------".format("-"*maxColNameSize))
for metric in evaluation :
if args.counts :
print("{:{}}|{:10} |{:10} |{:10} |{:10}".format(
metric,
maxColNameSize,
evaluation[metric][0].correct,
evaluation[metric][0].gold_total,
evaluation[metric][0].system_total,
evaluation[metric][0].aligned_total or (evaluation[metric][0].correct if metric == "Words" else "")
))
else :
precision = ("{:10.2f}" if abs(evaluation[metric][0].precision) > 1.0 else "{:10.4f}").format(evaluation[metric][0].precision)
recall = ("{:10.2f}" if abs(evaluation[metric][0].recall) > 1.0 else "{:10.4f}").format(evaluation[metric][0].recall)
f1 = ("{:10.2f}" if abs(evaluation[metric][0].f1) > 1.0 else "{:10.4f}").format(evaluation[metric][0].f1)
print("{:{}}|{} |{} |{} |{}".format(
metric,
maxColNameSize,
precision,
recall,
f1,
"{:10.2f}".format(evaluation[metric][0].aligned_accuracy) if evaluation[metric][0].aligned_accuracy is not None else ""
))
for id2 in range(len(errors_list)) :
errors = errors_list[id2]
errors.sort()
print("Most frequent errors for metric '{}' :".format(errors.metric))
print("{:>12} {:>5} {:>6} {}\n {:->37}".format("ID", "NB", "%AGE", "GOLD->SYSTEM", ""))
print("{:>12} {:5} {:6.2f}%".format("Total", len(errors), 100))
for id3 in range(len(errors.types[:10])) :
error_type = errors.types[:10][id3]
t = error_type.type
nb = len(error_type)
percent = 100.0*nb/len(errors)
id = ":".join(map(str,[id1,id2,id3,"*"]))
print("{:>12} {:5} {:6.2f}% {}".format(id, nb, percent, t))
for id4 in range(len(error_type)) :
examples_list.append((":".join(map(str,[id1,id2,id3,id4])), error_type.errors[id4]))
print("")
for id1 in range(len(evaluations)) :
(system1_ud, evaluation) = evaluations[id1]
for id2 in range(len(evaluations)) :
if id1 == id2 :
continue
(system2_ud, evaluation) = evaluations[id2]
errors1 = errors_by_file[id1]
errors2 = errors_by_file[id2]
if len(errors1) > 0 :
print("{} Error comparison {}".format("*"*31, "*"*31))
print("{:>30} : {}".format("These errors are present in", system1_ud.filename))
print("{:>30} : {}".format("and not in", system2_ud.filename))
for id3 in range(len(errors1)) :
metric = errors1[id3].metric
errors_diff = Errors(metric, errors1[id3], errors2[id3])
errors_diff.sort()
print("{:>12} {:5} {:6.2f}%".format("Total", len(errors_diff), 100))
for id4 in range(len(errors_diff.types[:10])) :
error_type = errors_diff.types[:10][id4]
t = error_type.type
nb = len(error_type)
percent = 100.0*nb/len(errors)
id = ":".join(map(str,["d"+str(id1),id3,id4,"*"]))
print("{:>12} {:5} {:6.2f}% {}".format(id, nb, percent, t))
for id5 in range(len(error_type)) :
examples_list.append((":".join(map(str,["d"+str(id1),id3,id4,id5])), error_type.errors[id5]))
print("")
if len(examples_list) > 0 :
print("{}List of all errors by their ID{}".format("*"*25,"*"*25))
print("{}{:^30}{}\n".format("*"*25,"Format is GOLD | PREDICTED","*"*25))
for (id,error) in examples_list :
print("ID="+id)
print(error)
print("")
################################################################################
################################################################################
if __name__ == "__main__" :
main()
################################################################################
......@@ -2,15 +2,11 @@
import sys
import random
import argparse
import Config
from Transition import Transition
################################################################################
def printUsageAndExit() :
print("USAGE : %s file.conllu"%sys.argv[0], file=sys.stderr)
exit(1)
################################################################################
################################################################################
def applyTransition(ts, strat, config, name) :
transition = [trans for trans in ts if trans.name == name][0]
......@@ -19,30 +15,38 @@ def applyTransition(ts, strat, config, name) :
config.moveWordIndex(movement)
################################################################################
################################################################################
def randomDecode(ts, strat, config) :
EOS = Transition("EOS")
config.moveWordIndex(0)
while config.wordIndex < len(config.lines) - 1 :
candidates = [trans for trans in transitionSet if trans.appliable(config)]
candidate = candidates[random.randint(0, 100) % len(candidates)]
applyTransition(transitionSet, strategy, config, candidate.name)
if args.debug :
print(candidate.name, file=sys.stderr)
config.printForDebug(sys.stderr)
EOS.apply(config)
################################################################################
################################################################################
if __name__ == "__main__" :
if len(sys.argv) != 2 :
printUsageAndExit()
parser = argparse.ArgumentParser()
parser.add_argument("trainCorpus", type=str,
help="Name of the CoNLL-U training file.")
parser.add_argument("--debug", "-d", default=False, action="store_true",
help="Print debug infos on stderr.")
args = parser.parse_args()
transitionSet = [Transition(elem) for elem in ["RIGHT", "LEFT", "SHIFT", "REDUCE"]]
EOS = Transition("EOS")
strategy = {"RIGHT" : 1, "SHIFT" : 1, "LEFT" : 0, "REDUCE" : 0}
sentences = Config.readConllu(sys.argv[1])
debug = True
first = True
for config in sentences :
config.moveWordIndex(0)
while config.wordIndex < len(config.lines) - 1 :
candidates = [trans for trans in transitionSet if trans.appliable(config)]
candidate = candidates[random.randint(0, 100) % len(candidates)]
applyTransition(transitionSet, strategy, config, candidate.name)
if debug :
print(candidate.name, file=sys.stderr)
config.printForDebug(sys.stderr)
EOS.apply(config)
config.print(sys.stdout)
randomDecode(transitionSet, strategy, config)
config.print(sys.stdout, header=first)
first = False
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment