From e29d000f0d3289e977db386e73626bd680f4d8a8 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 27 Oct 2021 11:44:23 +0200 Subject: [PATCH] mproved pvalue computation --- outputs/evalAndStats.py | 24 +++++++++------- outputs/pvalues/pvalues.sh | 58 +++++++++++++++++++++++++------------- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/outputs/evalAndStats.py b/outputs/evalAndStats.py index 9708c37..1e2af78 100755 --- a/outputs/evalAndStats.py +++ b/outputs/evalAndStats.py @@ -14,7 +14,7 @@ groups = {} splits = set() for filename in os.listdir() : - if len(filename) < 7 or filename[-7:] != ".conllu" : + if len(filename) < 7 or filename[-7:] != ".conllu" or "bt2" in filename : continue splitNum = int(filename.split("_")[0]) splits.add(splitNum) @@ -26,16 +26,20 @@ for filename in os.listdir() : refs = " ".join(["%sUD_French-GSD_%d/test.conllu"%(data, split) for split in splits]) os.system("cat %s > %s%s_corpus.conllu"%(refs, gold, ",".join(list(map(str,splits))))) - + +btTraces = [] for group in groups : - groupdir = "%s%s/"%(pvalues, group) - os.makedirs(groupdir, exist_ok=True) - for elem in groups[group] : - ins = " ".join(["%d_%s.conllu"%(split, elem) for split in splits]) - os.makedirs("%s%s/"%(groupdir, elem), exist_ok=True) - os.system("cat %s > %s%s/%s_corpus.conllu"%(ins, groupdir, elem, ",".join(list(map(str,splits))))) + groupDir = "%s%s/"%(pvalues, group) + for model in groups[group] : + modeloutputs = " ".join(["%d_%s.conllu"%(split, model) for split in splits]) + modelDir = "%s%s/"%(groupDir, model) + os.makedirs("%s"%(modelDir), exist_ok=True) + os.system("cat %s > %s%s_corpus.conllu"%(modeloutputs, modelDir, ",".join(list(map(str,splits))))) + + btTraces.append(["%d_%s.trace"%(list(splits)[0], elem) for elem in groups[group] if "bt" in elem and "nobt" not in elem and "1" in elem][0]) traces = " ".join(["%d_%s.trace"%(list(splits)[0], elem) for elem in groups[group]]) - os.system("%s %s --stats > stats_%s.txt"%(readTrace, traces, group)) os.system("%s %s --steps > steps_%s.txt"%(readTrace, traces, group)) - + +os.system("%s %s --stats > stats_bt.txt"%(readTrace, " ".join(btTraces))) + diff --git a/outputs/pvalues/pvalues.sh b/outputs/pvalues/pvalues.sh index efbe713..4abead2 100755 --- a/outputs/pvalues/pvalues.sh +++ b/outputs/pvalues/pvalues.sh @@ -1,24 +1,42 @@ #!/bin/bash -NAME='tagparser' -SYSTEMS=`ls $NAME` -METRICS="UAS UPOS" -RUNS=10000 +RUNS=50000 +GRPS=('eager' + 'tagger' + 'tagparser') -for metric in $METRICS; do - [[ $# -ne 0 ]] && SYSTEMS=$@ - set -x - set -e - for sys in $SYSTEMS; do - mkdir -p results/$sys - for testset in `ls $NAME/$sys`; do - udapy read.Conllu zone=gold files=gold/$testset \ - read.Conllu zone=pred files=$NAME/$sys/$testset ignore_sent_id=1 \ - util.ResegmentGold \ - eval.Conll18 print_results=0 print_raw=$metric \ - > results/$sys/${testset%.conllu} - done +computePVal () { + rm -r results + NAME="$1" + OUTPUT="$1.res" + METRICS="UAS UPOS" + if [ "$OUTPUT" = "tagger.res" ]; then + METRICS="UPOS" + fi + if [ "$OUTPUT" = "eager.res" ]; then + METRICS="UAS" + fi + SYSTEMS=$(ls $NAME) + > $OUTPUT + for metric in $METRICS; do + set -x + set -e + for sys in $SYSTEMS; do + mkdir -p results/$sys + for testset in `ls $NAME/$sys`; do + udapy read.Conllu zone=gold files=gold/$testset \ + read.Conllu zone=pred files=$NAME/$sys/$testset ignore_sent_id=1 \ + util.ResegmentGold \ + eval.Conll18 print_results=0 print_raw=$metric \ + > results/$sys/${testset%.conllu} + done + done + echo $metric >> $OUTPUT + python3 `python3 -c 'import conll18 as x; print(x.__file__)'` -r $RUNS >> $OUTPUT + echo "" >> $OUTPUT done - echo $metric - python3 `python3 -c 'import conll18 as x; print(x.__file__)'` -r $RUNS - echo "" +} + +for grp in ${GRPS[@]}; do + computePVal $grp done + -- GitLab