Skip to content
Snippets Groups Projects
Commit 7ea2bf6d authored by Carlos Ramisch's avatar Carlos Ramisch
Browse files

Update accuracy to evaluate NER with P/R/F

parent d0769a46
Branches
No related tags found
No related merge requests found
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys import sys
import argparse import argparse
import collections from collections import defaultdict, Counter
import pdb import pdb
from conllulib import CoNLLUReader, Util from conllulib import CoNLLUReader, Util
...@@ -23,10 +24,10 @@ parser.add_argument('-g', "--gold", metavar="FILENAME.conllu", required=True,\ ...@@ -23,10 +24,10 @@ parser.add_argument('-g', "--gold", metavar="FILENAME.conllu", required=True,\
parser.add_argument('-t', "--train", metavar="FILENAME.conllu", required=False,\ parser.add_argument('-t', "--train", metavar="FILENAME.conllu", required=False,\
dest="train_filename", type=argparse.FileType('r', encoding='UTF-8'), \ dest="train_filename", type=argparse.FileType('r', encoding='UTF-8'), \
help="""Training corpus in CoNLL-U, from which tagger was learnt.""") help="""Training corpus in CoNLL-U, from which tagger was learnt.""")
parser.add_argument('-c', "--tagcolumn", metavar="NAME", dest="col_name_tag", parser.add_argument('-c', "--tagcolumn", metavar="NAME", dest="name_tag",
required=False, type=str, default="upos", help="""Column name of tags, \ required=False, type=str, default="upos", help="""Column name of tags, \
as defined in header. Use lowercase""") as defined in header. Use lowercase""")
parser.add_argument('-f', "--featcolumn", metavar="NAME", dest="col_name_feat", parser.add_argument('-f', "--featcolumn", metavar="NAME", dest="name_feat",
required=False, type=str, default="form", help="""Column name of input required=False, type=str, default="form", help="""Column name of input
feature, as defined in header. Use lowercase.""") feature, as defined in header. Use lowercase.""")
parser.add_argument('-u', "--upos-filter", metavar="NAME", dest="upos_filter", parser.add_argument('-u', "--upos-filter", metavar="NAME", dest="upos_filter",
...@@ -34,13 +35,20 @@ parser.add_argument('-u', "--upos-filter", metavar="NAME", dest="upos_filter", ...@@ -34,13 +35,20 @@ parser.add_argument('-u', "--upos-filter", metavar="NAME", dest="upos_filter",
help="""Only calculate accuracy for words with UPOS in this list. \ help="""Only calculate accuracy for words with UPOS in this list. \
Empty list = no filter.""") Empty list = no filter.""")
######################################################################## ################################################################################
def process_args(parser): def process_args(parser):
"""
Show (in debug mode) and process all command line options. Checks tag and feat
columns appear in corpora. Create training corpus vocabulary if option present
for OOV status check. Input is an instance of `argparse.ArgumentParser`,
returns list of `args`, `gold_corpus` and `pred_corpus` as `CoNLLUReader`,
`train_vocab` dictionary.
"""
args = parser.parse_args() args = parser.parse_args()
Util.DEBUG_FLAG = args.DEBUG_FLAG Util.DEBUG_FLAG = args.DEBUG_FLAG
args.col_name_tag = args.col_name_tag.lower() args.name_tag = args.name_tag.lower()
args.col_name_feat = args.col_name_feat.lower() args.name_feat = args.name_feat.lower()
Util.debug("Command-line arguments and defaults:") Util.debug("Command-line arguments and defaults:")
for (k,v) in vars(args).items(): for (k,v) in vars(args).items():
Util.debug(" * {}: {}",k,v) Util.debug(" * {}: {}",k,v)
...@@ -49,37 +57,21 @@ def process_args(parser): ...@@ -49,37 +57,21 @@ def process_args(parser):
train_vocab = None train_vocab = None
if args.train_filename: if args.train_filename:
train_corpus = CoNLLUReader(args.train_filename) train_corpus = CoNLLUReader(args.train_filename)
ignoreme, train_vocab = train_corpus.to_int_and_vocab({args.col_name_feat:[]}) ignoreme, train_vocab = train_corpus.to_int_and_vocab({args.name_feat:[]})
if args.col_name_tag not in gold_corpus.header or \ if args.name_tag not in gold_corpus.header or \
args.col_name_feat not in gold_corpus.header: args.name_feat not in gold_corpus.header:
Util.error("-c and -f names must be valid conllu column among:\n{}", Util.error("-c and -f names must be valid conllu column among:\n{}",
gold_corpus.header) gold_corpus.header)
return args, gold_corpus, pred_corpus, train_vocab return args, gold_corpus, pred_corpus, train_vocab
######################################################################## ################################################################################
if __name__ == "__main__": def tp_count_feats(tok_pred, tok_gold, prf):
args, gold_corpus, pred_corpus, train_vocab = process_args(parser) """
prf = collections.defaultdict(lambda:{'tp':0,'t':0, 'p':0}) Increment number of true positives, trues and positives for morph feature eval
total_tokens = correct_tokens = 0 Compares all features of `tok_pred` with thos of `tok_gold`
total_oov = correct_oov = 0 Result is modification of `prf` dict, function does not return anything
for (sent_gold, sent_pred) in zip(gold_corpus.readConllu(), """
pred_corpus.readConllu()):
for (tok_gold, tok_pred) in zip (sent_gold, sent_pred):
if not args.upos_filter or tok_gold['upos'] in args.upos_filter :
if train_vocab :
train_vocab_feat = train_vocab[args.col_name_feat].keys()
if tok_gold[args.col_name_feat] not in train_vocab_feat:
total_oov = total_oov + 1
oov = True
else:
oov = False
if tok_gold[args.col_name_tag] == tok_pred[args.col_name_tag]:
correct_tokens = correct_tokens + 1
if train_vocab and oov :
correct_oov = correct_oov + 1
total_tokens += 1
if args.col_name_tag == 'feats':
pred_feats = tok_pred['feats'] if tok_pred['feats'] else {} pred_feats = tok_pred['feats'] if tok_pred['feats'] else {}
gold_feats = tok_gold['feats'] if tok_gold['feats'] else {} gold_feats = tok_gold['feats'] if tok_gold['feats'] else {}
for key in pred_feats.keys(): for key in pred_feats.keys():
...@@ -93,34 +85,113 @@ if __name__ == "__main__": ...@@ -93,34 +85,113 @@ if __name__ == "__main__":
t_inc = int(gold_feats.get(key,None) != None) t_inc = int(gold_feats.get(key,None) != None)
prf[key]['t'] = prf[key]['t'] + t_inc prf[key]['t'] = prf[key]['t'] + t_inc
prf['micro-average']['t'] = prf['micro-average']['t'] + t_inc prf['micro-average']['t'] = prf['micro-average']['t'] + t_inc
print("Pred file: {}".format(pred_corpus.name()))
################################################################################
def parseme_cat_in(ent, ent_list):
"""
Verify if `ent` is present in `ent_list` by comparing both span AND category.
Default cuptlib implementation ignores category
"""
for ent_cand in ent_list:
if ent.span == ent_cand.span and ent.cat == ent_cand.cat :
return True
return False
################################################################################
def tp_count_parseme(s_pred, s_gold, name_tag, prf):
try :
import parseme.cupt as cupt
except ImportError:
print("""Please install cuptlib before running this script\n\n git clone \
https://gitlab.com/parseme/cuptlib.git\n cd cuptlib\n pip install .""")
sys.exit(-1)
ents_pred = cupt.retrieve_mwes(s_pred, column_name=name_tag)
ents_gold = cupt.retrieve_mwes(s_gold, column_name=name_tag)
prf['Exact-nocat']['p'] += len(ents_pred)
prf['Exact-nocat']['t'] += len(ents_gold)
for e_pred in ents_pred.values() :
if e_pred in ents_gold.values() :
#pdb.set_trace()
prf['Exact-nocat']['tp'] += 1
if parseme_cat_in(e_pred, ents_gold.values()) :
prf['Exact-'+e_pred.cat]['tp'] += 1
prf['Exact-'+e_pred.cat]['p'] += 1
for e_pred in ents_gold.values() :
prf['Exact-'+e_pred.cat]['t'] += 1
# Token-based evaluation - categories always ignored here
span_pred = sum([list(ep.int_span()) for ep in ents_pred.values()], start=[])
span_gold = sum([list(eg.int_span()) for eg in ents_gold.values()], start=[])
prf['Token-nocat']['p'] += len(span_pred)
prf['Token-nocat']['t'] += len(span_gold)
for e_pred in span_pred :
if e_pred in span_gold :
prf['Token-nocat']['tp'] += 1
################################################################################
def print_results(pred_corpus_name, args, acc, prf):
"""
Calculate and print accuracies, precision, recall, f-score, etc.
"""
print("Predictions file: {}".format(pred_corpus_name))
if args.upos_filter : if args.upos_filter :
print("Results focus only on following UPOS: {}".format(" ".join(args.upos_filter))) print("Results concern only some UPOS: {}".format(" ".join(args.upos_filter)))
accuracy = (correct_tokens / total_tokens) * 100 accuracy = (acc['correct_tokens'] / acc['total_tokens']) * 100
print("Accuracy on all {}: {:0.2f} ({}/{})".format(args.col_name_tag,accuracy, print("Accuracy on all {}: {:0.2f} ({:5}/{:5})".format(args.name_tag, accuracy,
correct_tokens, total_tokens)) acc['correct_tokens'], acc['total_tokens']))
if train_vocab : if args.train_filename :
accuracy_oov = (correct_oov / total_oov) * 100 accuracy_oov = (acc['correct_oov'] / acc['total_oov']) * 100
print("Accuracy on OOV {}: {:0.2f} ({}/{})".format(args.col_name_tag, print("Accuracy on OOV {}: {:0.2f} ({:5}/{:5})".format(args.name_tag, accuracy_oov,
accuracy_oov, acc['correct_oov'], acc['total_oov']))
correct_oov, total_oov))
if prf: if prf:
print("Metrics per feature:") print("\nPrecision, recall, and F-score for {}:".format(args.name_tag))
macro = {"precis":0.0, "recall":0.0} macro = {"precis":0.0, "recall":0.0}
for key in sorted(prf.keys()): for key in sorted(prf): # max prevents zero-division in P and R
precis = prf[key]['tp'] / max(1,prf[key]['p']) # max prevents zero-division precis = (prf[key]['tp'] / max(1, prf[key]['p'])) * 100
recall = prf[key]['tp'] / max(1,prf[key]['t']) recall = (prf[key]['tp'] / max(1, prf[key]['t'])) * 100
fscore = (2*precis*recall)/max(1,precis+recall) fscore = ((2 * precis * recall) / max(1, precis + recall))
if key != 'micro-average': if key != 'micro-average':
macro['precis'] = macro['precis'] + precis macro['precis'] = macro['precis'] + precis
macro['recall'] = macro['recall'] + recall macro['recall'] = macro['recall'] + recall
else: else:
print() print()
templ = "{:13}: P={:6.2f} ({:5}/{:5}) / R={:6.2f} ({:5}/{:5}) / F={:6.2f}" templ = "{:13}: P={:6.2f} ({:5}/{:5}) / R={:6.2f} ({:5}/{:5}) / F={:6.2f}"
print(templ.format(key,precis*100,prf[key]['tp'],prf[key]['p'],recall*100, print(templ.format(key, precis, prf[key]['tp'], prf[key]['p'], recall,
prf[key]['tp'],prf[key]['t'], fscore*100)) prf[key]['tp'], prf[key]['t'], fscore))
templ = "{:13}: P={:6.2f} / R={:6.2f} / F={:6.2f}" templ = "{:13}: P={:6.2f}" + " "*15 + "/ R={:6.2f}" + " "*15 + "/ F={:6.2f}"
ma_precis = macro['precis'] / (len(prf.keys())-1) if len(prf) > 1 : # Calculate macro-precision
ma_recall = macro['recall'] / (len(prf.keys())-1) nb_scores = len(prf)-1 if "micro-average" in prf else len(prf)
ma_fscore = (2*ma_precis*ma_recall)/max(1,ma_precis+ma_recall) ma_precis = (macro['precis'] / (nb_scores))
print(templ.format("macro-average",ma_precis*100,ma_recall*100, ma_fscore*100)) ma_recall = (macro['recall'] / (nb_scores))
ma_fscore = ((2*ma_precis*ma_recall)/max(1,ma_precis+ma_recall))
print(templ.format("macro-average", ma_precis, ma_recall, ma_fscore))
################################################################################
if __name__ == "__main__":
args, gold_corpus, pred_corpus, train_vocab = process_args(parser)
prf = defaultdict(lambda:{'tp':0,'t':0, 'p':0}) # used for feats, NEs and MWEs
acc = Counter() # store correct and total for all and OOV
for (s_gold,s_pred) in zip(gold_corpus.readConllu(),pred_corpus.readConllu()):
if args.name_tag.startswith("parseme"):
tp_count_parseme(s_pred, s_gold, args.name_tag, prf)
for (tok_gold, tok_pred) in zip (s_gold, s_pred):
if not args.upos_filter or tok_gold['upos'] in args.upos_filter :
if train_vocab :
train_vocab_feat = train_vocab[args.name_feat].keys()
if tok_gold[args.name_feat] not in train_vocab_feat:
acc['total_oov'] += 1
oov = True
else:
oov = False
if tok_gold[args.name_tag] == tok_pred[args.name_tag]:
acc['correct_tokens'] += 1
if train_vocab and oov :
acc['correct_oov'] += 1
acc['total_tokens'] += 1
if args.name_tag == 'feats':
tp_count_feats(tok_gold, tok_pred, prf)
print_results(pred_corpus.name(), args, acc, prf)
...@@ -6,6 +6,7 @@ import collections ...@@ -6,6 +6,7 @@ import collections
from torch.utils.data import TensorDataset, DataLoader from torch.utils.data import TensorDataset, DataLoader
import torch import torch
import random import random
import numpy as np
import pdb import pdb
######################################################################## ########################################################################
...@@ -15,6 +16,7 @@ import pdb ...@@ -15,6 +16,7 @@ import pdb
class Util(object): class Util(object):
DEBUG_FLAG = False DEBUG_FLAG = False
PSEUDO_INF = 9999.0
############################### ###############################
...@@ -64,6 +66,23 @@ class Util(object): ...@@ -64,6 +66,23 @@ class Util(object):
random.seed(seed) random.seed(seed)
torch.manual_seed(seed) torch.manual_seed(seed)
###############################
@staticmethod
def log_cap(number):
"""Returns the base-10 logarithm of `number`.
If `number` is negative, stops the program with an error message.
If `number` is zero returns -9999.0 representing negative pseudo infinity
This is more convenient than -np.inf returned by np.log10 because :
inf + a = inf (no difference in sum) but 9999.0 + a != 9999.0"""
if number < 0 :
Util.error("Cannot get logarithm of negative number {}".format(number))
elif number == 0:
return -Util.PSEUDO_INF
else :
return np.log10(number)
######################################################################## ########################################################################
# CONLLU FUNCTIONS # CONLLU FUNCTIONS
######################################################################## ########################################################################
...@@ -72,6 +91,8 @@ class CoNLLUReader(object): ...@@ -72,6 +91,8 @@ class CoNLLUReader(object):
############################### ###############################
start_tag = "<s>"
def __init__(self, infile): def __init__(self, infile):
self.infile = infile self.infile = infile
DEFAULT_HEADER = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC " +\ DEFAULT_HEADER = "ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC " +\
...@@ -129,14 +150,14 @@ class CoNLLUReader(object): ...@@ -129,14 +150,14 @@ class CoNLLUReader(object):
############################### ###############################
def to_int_from_vocab(self, col_name_dict, unk_token, vocab={}): def to_int_from_vocab(self, col_names, unk_token, vocab={}):
int_list = {} int_list = {}
unk_toks = {} unk_toks = {}
for col_name, special_tokens in col_name_dict.items(): for col_name in col_names:
int_list[col_name] = [] int_list[col_name] = []
unk_toks[col_name] = vocab[col_name].get(unk_token,None) unk_toks[col_name] = vocab[col_name].get(unk_token,None)
for s in self.readConllu(): for s in self.readConllu():
for col_name in col_name_dict.keys(): for col_name in col_names:
id_getter = lambda v,t: v[col_name].get(t[col_name],unk_toks[col_name]) id_getter = lambda v,t: v[col_name].get(t[col_name],unk_toks[col_name])
int_list[col_name].append([id_getter(vocab,tok) for tok in s]) int_list[col_name].append([id_getter(vocab,tok) for tok in s])
return int_list return int_list
...@@ -144,11 +165,126 @@ class CoNLLUReader(object): ...@@ -144,11 +165,126 @@ class CoNLLUReader(object):
############################### ###############################
@staticmethod @staticmethod
def to_int_from_vocab_sent(sent, col_name_dict, unk_token, vocab={}): def to_int_from_vocab_sent(sent, col_names, unk_token, vocab={},
lowercase=False):
int_list = {} int_list = {}
for col_name in col_name_dict.keys(): for col_name in col_names:
unk_tok_id = vocab[col_name].get(unk_token, None) unk_tok_id = vocab[col_name].get(unk_token, None)
id_getter = lambda v,t: v[col_name].get(t[col_name],unk_tok_id) low_or_not = lambda w: w.lower() if lowercase else w
id_getter = lambda v,t: v[col_name].get(low_or_not(t[col_name]),unk_tok_id)
int_list[col_name]=[id_getter(vocab,tok) for tok in sent] int_list[col_name]=[id_getter(vocab,tok) for tok in sent]
return int_list return int_list
###############################
@staticmethod
def to_bio(sent, bio_style='bio', name_tag='parseme:ne'):
bio_enc = []
neindex = 0
for tok in sent :
netag = tok[name_tag]
if netag == '*' :
cur_tag = 'O'
elif netag == neindex :
cur_tag = 'I' + necat
else :
neindex, necat = netag.split(":")
necat = '-' + necat
if bio_style == 'io' :
cur_tag = 'I' + necat
else:
cur_tag = 'B' + necat
bio_enc.append(cur_tag)
return bio_enc
###############################
@staticmethod
def from_bio(bio_enc, bio_style='bio', stop_on_error=False):
"""Converst BIO-encoded annotations into Sequoia/parseme format.
Input `bio_enc` is a list of strings, each corresponding to one BIO tag.
`bio_style` can be "bio" (default) or "io". Will try to recover encoding
errors by replacing wrong tags when `stop_on_error` equals False (default),
otherwise stops execution and shows an error message.
Only works for BIO-cat & IO-cat, with -cat appended to both B and I tags.
Requires adaptations for BIOES, and encoding schemes without "-cat.
Examples:
>>> from_bio(["B-PERS", "I-PERS", "I-PERS", "O", "B-LOC", "I-LOC"], bio_style='bio')
['1:PERS', '1', '1', '*', '2:LOC', '2']
>>> from_bio(["B-PERS", "I-PERS", "I-PERS", "O", "B-LOC", "I-LOC"],bio_style='io')
WARNING: Got B tag in spite of 'io' bio_style: interpreted as I
WARNING: Got B tag in spite of 'io' bio_style: interpreted as I
['1:PERS', '1', '1', '*', '2:LOC', '2']
>>> from_bio(["I-PERS", "B-PERS", "I-PERS", "O", "I-LOC"],bio_style='io')
WARNING: Got B tag in spite of 'io' bio_style: interpreted as I
['1:PERS', '1', '1', '*', '2:LOC']
>>> from_bio(["I-PERS", "I-PERS", "I-PERS", "O", "I-LOC"], bio_style='bio')
WARNING: Invalid I-initial tag I-PERS converted to B
WARNING: Invalid I-initial tag I-LOC converted to B
['1:PERS', '1', '1', '*', '2:LOC']
>>> from_bio(["I-PERS", "B-PERS", "I-PERS", "O", "I-LOC"], bio_style='bio')
WARNING: Invalid I-initial tag I-PERS converted to B
WARNING: Invalid I-initial tag I-LOC converted to B
['1:PERS', '2:PERS', '2', '*', '3:LOC']
>>> from_bio(["I-PERS", "B-PERS", "I-EVE", "O", "I-PERS"], bio_style='io')
['1:PERS', '2:PERS', '3:EVE', '*', '4:PERS']
>>> from_bio(["I-PERS", "B-PERS", "I-EVE", "O", "I-PERS"], bio_style='bio')
WARNING: Invalid I-initial tag I-PERS converted to B
WARNING: Invalid I-initial tag I-EVE converted to B
WARNING: Invalid I-initial tag I-PERS converted to B
['1:PERS', '2:PERS', '3:EVE', '*', '4:PERS']
"""
# TODO: warning if I-cat != previous I-cat or B-cat
result = []
neindex = 0
prev_bio_tag = 'O'
prev_cat = None
for bio_tag in bio_enc :
if bio_tag == 'O' :
seq_tag = '*'
elif bio_tag[0] in ['B', 'I'] and bio_tag[1] == '-':
necat = bio_tag.split("-")[1]
if bio_tag[0] == 'B' and bio_style == 'bio':
neindex += 1 # Begining of an entity
seq_tag = str(neindex) + ":" + necat
elif bio_tag[0] == 'B' : # bio_style = 'io'
if stop_on_error:
Util.error("B tag not allowed with 'io'")
else:
bio_tag = bio_tag.replace("B-", "I-")
Util.warn("Got B tag in spite of 'io' bio_style: interpreted as I")
if bio_tag[0] == "I" and bio_style == "io" :
if necat != prev_cat:
neindex += 1 # Begining of an entity
seq_tag = str(neindex) + ":" + necat
else:
seq_tag = str(neindex) # is a continuation
elif bio_tag[0] == "I" : # tag is "I" and bio_style is "bio"
if bio_style == 'bio' and prev_bio_tag != 'O' and necat == prev_cat :
seq_tag = str(neindex) # is a continuation
elif stop_on_error :
Util.error("Invalid I-initial tag in BIO format: {}".format(bio_tag))
else:
neindex += 1 # Begining of an entity
seq_tag = str(neindex) + ":" + necat
Util.warn("Invalid I-initial tag {} converted to B".format(bio_tag))
prev_cat = necat
else:
if stop_on_error:
Util.error("Invalid BIO tag: {}".format(bio_tag))
else:
Util.warn("Invalid BIO tag {} converted to O".format(bio_tag))
result.append("*")
result.append(seq_tag)
prev_bio_tag = bio_tag
return result
################################################################################
...@@ -35,7 +35,13 @@ import sys ...@@ -35,7 +35,13 @@ import sys
import conllu import conllu
import re import re
import pdb import pdb
import subprocess
try :
import parseme.cupt as cupt import parseme.cupt as cupt
except ImportError:
print("""Please install cuptlib before running this script\n\n git clone \
https://gitlab.com/parseme/cuptlib.git\n cd cuptlib\n pip install .""")
sys.exit(-1)
######################################### #########################################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment