diff --git a/outputs/evalAndStats.py b/outputs/evalAndStats.py new file mode 100755 index 0000000000000000000000000000000000000000..1ed65acea77a8356e07d2bf16df9fd68606d3911 --- /dev/null +++ b/outputs/evalAndStats.py @@ -0,0 +1,40 @@ +#! /usr/bin/env python3 + +import os + +data = "../data/" +evalScript = "../conll18_ud_eval.py" +readTrace = "../readTrace.py" +pvalues = "pvalues/" +gold = "%sgold/"%pvalues + +os.makedirs(gold, exist_ok=True) + +groups = {} +splits = set() + +for filename in os.listdir() : + if len(filename) < 7 or filename[-7:] != ".conllu" : + continue + splitNum = int(filename.split("_")[0]) + splits.add(splitNum) + basename = "_".join((".".join(filename.split(".")[:-1])).split("_")[1:]) + group = basename.split("_")[0] + if group not in groups : + groups[group] = [] + groups[group].append(basename) + +refs = " ".join(["%sUD_French-GSD_%d/test.conllu"%(data, split) for split in splits]) +os.system("cat %s > %s%s_corpus.conllu"%(refs, gold, ",".join(list(map(str,splits))))) + +for group in groups : + groupdir = "%s%s/"%(pvalues, group) + os.makedirs(groupdir, exist_ok=True) + for elem in groups[group] : + ins = " ".join(["%d_%s.conllu"%(split, elem) for split in splits]) + os.makedirs("%s%s/"%(groupdir, elem), exist_ok=True) + os.system("cat %s > %s%s/%s_corpus.conllu"%(ins, groupdir, elem, ",".join(list(map(str,splits))))) + + traces = " ".join(["%d_%s.trace"%(list(splits)[0], elem) for elem in groups[group]]) + os.system("%s %s --stats > stats_%s.txt"%(readTrace, traces, group)) + diff --git a/outputs/pvalues/pvalues.sh b/outputs/pvalues/pvalues.sh new file mode 100755 index 0000000000000000000000000000000000000000..efbe7133a9a5d4710044039936917e4ed24b991b --- /dev/null +++ b/outputs/pvalues/pvalues.sh @@ -0,0 +1,24 @@ +#!/bin/bash +NAME='tagparser' +SYSTEMS=`ls $NAME` +METRICS="UAS UPOS" +RUNS=10000 + +for metric in $METRICS; do + [[ $# -ne 0 ]] && SYSTEMS=$@ + set -x + set -e + for sys in $SYSTEMS; do + mkdir -p results/$sys + for testset in `ls $NAME/$sys`; do + udapy read.Conllu zone=gold files=gold/$testset \ + read.Conllu zone=pred files=$NAME/$sys/$testset ignore_sent_id=1 \ + util.ResegmentGold \ + eval.Conll18 print_results=0 print_raw=$metric \ + > results/$sys/${testset%.conllu} + done + done + echo $metric + python3 `python3 -c 'import conll18 as x; print(x.__file__)'` -r $RUNS + echo "" +done diff --git a/readTrace.py b/readTrace.py index 4d1354ce2b13c99219f1fa82c04017cad0f06e77..2cb559704ad1b4be1334042b4ff10b68ddd68709 100755 --- a/readTrace.py +++ b/readTrace.py @@ -284,7 +284,7 @@ class History() : globalStats["arcsAccuracy"] = 100.0*(globalStats["nbArcs"]-globalStats["nbMissedArcs"])/globalStats["nbArcs"] if globalStats["nbErr"] > 0 : globalStats["avgErrCost"] /= globalStats["nbErr"] - if globalStats["nbErr"] > 0 : + if globalStats["nbErrParser"] > 0 : globalStats["avgErrCostParser"] /= globalStats["nbErrParser"] if globalStats["nbErr"] > 0 : globalStats["backRecall"] = 100.0*globalStats["nbErrFound"] / globalStats["nbErr"]