diff --git a/outputs/evalAndStats.py b/outputs/evalAndStats.py index 9708c3788b92c19a89adb42b41b7aad0289a4fc5..1e2af78c21356b1e77abf0a64753274ce0400abf 100755 --- a/outputs/evalAndStats.py +++ b/outputs/evalAndStats.py @@ -14,7 +14,7 @@ groups = {} splits = set() for filename in os.listdir() : - if len(filename) < 7 or filename[-7:] != ".conllu" : + if len(filename) < 7 or filename[-7:] != ".conllu" or "bt2" in filename : continue splitNum = int(filename.split("_")[0]) splits.add(splitNum) @@ -26,16 +26,20 @@ for filename in os.listdir() : refs = " ".join(["%sUD_French-GSD_%d/test.conllu"%(data, split) for split in splits]) os.system("cat %s > %s%s_corpus.conllu"%(refs, gold, ",".join(list(map(str,splits))))) - + +btTraces = [] for group in groups : - groupdir = "%s%s/"%(pvalues, group) - os.makedirs(groupdir, exist_ok=True) - for elem in groups[group] : - ins = " ".join(["%d_%s.conllu"%(split, elem) for split in splits]) - os.makedirs("%s%s/"%(groupdir, elem), exist_ok=True) - os.system("cat %s > %s%s/%s_corpus.conllu"%(ins, groupdir, elem, ",".join(list(map(str,splits))))) + groupDir = "%s%s/"%(pvalues, group) + for model in groups[group] : + modeloutputs = " ".join(["%d_%s.conllu"%(split, model) for split in splits]) + modelDir = "%s%s/"%(groupDir, model) + os.makedirs("%s"%(modelDir), exist_ok=True) + os.system("cat %s > %s%s_corpus.conllu"%(modeloutputs, modelDir, ",".join(list(map(str,splits))))) + + btTraces.append(["%d_%s.trace"%(list(splits)[0], elem) for elem in groups[group] if "bt" in elem and "nobt" not in elem and "1" in elem][0]) traces = " ".join(["%d_%s.trace"%(list(splits)[0], elem) for elem in groups[group]]) - os.system("%s %s --stats > stats_%s.txt"%(readTrace, traces, group)) os.system("%s %s --steps > steps_%s.txt"%(readTrace, traces, group)) - + +os.system("%s %s --stats > stats_bt.txt"%(readTrace, " ".join(btTraces))) + diff --git a/outputs/pvalues/pvalues.sh b/outputs/pvalues/pvalues.sh index efbe7133a9a5d4710044039936917e4ed24b991b..4abead2d9784d40d47d87e347fe8611a8ea25d69 100755 --- a/outputs/pvalues/pvalues.sh +++ b/outputs/pvalues/pvalues.sh @@ -1,24 +1,42 @@ #!/bin/bash -NAME='tagparser' -SYSTEMS=`ls $NAME` -METRICS="UAS UPOS" -RUNS=10000 +RUNS=50000 +GRPS=('eager' + 'tagger' + 'tagparser') -for metric in $METRICS; do - [[ $# -ne 0 ]] && SYSTEMS=$@ - set -x - set -e - for sys in $SYSTEMS; do - mkdir -p results/$sys - for testset in `ls $NAME/$sys`; do - udapy read.Conllu zone=gold files=gold/$testset \ - read.Conllu zone=pred files=$NAME/$sys/$testset ignore_sent_id=1 \ - util.ResegmentGold \ - eval.Conll18 print_results=0 print_raw=$metric \ - > results/$sys/${testset%.conllu} - done +computePVal () { + rm -r results + NAME="$1" + OUTPUT="$1.res" + METRICS="UAS UPOS" + if [ "$OUTPUT" = "tagger.res" ]; then + METRICS="UPOS" + fi + if [ "$OUTPUT" = "eager.res" ]; then + METRICS="UAS" + fi + SYSTEMS=$(ls $NAME) + > $OUTPUT + for metric in $METRICS; do + set -x + set -e + for sys in $SYSTEMS; do + mkdir -p results/$sys + for testset in `ls $NAME/$sys`; do + udapy read.Conllu zone=gold files=gold/$testset \ + read.Conllu zone=pred files=$NAME/$sys/$testset ignore_sent_id=1 \ + util.ResegmentGold \ + eval.Conll18 print_results=0 print_raw=$metric \ + > results/$sys/${testset%.conllu} + done + done + echo $metric >> $OUTPUT + python3 `python3 -c 'import conll18 as x; print(x.__file__)'` -r $RUNS >> $OUTPUT + echo "" >> $OUTPUT done - echo $metric - python3 `python3 -c 'import conll18 as x; print(x.__file__)'` -r $RUNS - echo "" +} + +for grp in ${GRPS[@]}; do + computePVal $grp done +