From e29d000f0d3289e977db386e73626bd680f4d8a8 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Wed, 27 Oct 2021 11:44:23 +0200
Subject: [PATCH] mproved pvalue computation

---
 outputs/evalAndStats.py    | 24 +++++++++-------
 outputs/pvalues/pvalues.sh | 58 +++++++++++++++++++++++++-------------
 2 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/outputs/evalAndStats.py b/outputs/evalAndStats.py
index 9708c37..1e2af78 100755
--- a/outputs/evalAndStats.py
+++ b/outputs/evalAndStats.py
@@ -14,7 +14,7 @@ groups = {}
 splits = set()
 
 for filename in os.listdir() :
-  if len(filename) < 7 or filename[-7:] != ".conllu" :
+  if len(filename) < 7 or filename[-7:] != ".conllu" or "bt2" in filename :
     continue
   splitNum = int(filename.split("_")[0])
   splits.add(splitNum)
@@ -26,16 +26,20 @@ for filename in os.listdir() :
 
 refs = " ".join(["%sUD_French-GSD_%d/test.conllu"%(data, split) for split in splits])
 os.system("cat %s > %s%s_corpus.conllu"%(refs, gold, ",".join(list(map(str,splits)))))
-  
+
+btTraces = []
 for group in groups :
-  groupdir = "%s%s/"%(pvalues, group)
-  os.makedirs(groupdir, exist_ok=True)
-  for elem in groups[group] :
-    ins = " ".join(["%d_%s.conllu"%(split, elem) for split in splits])
-    os.makedirs("%s%s/"%(groupdir, elem), exist_ok=True)
-    os.system("cat %s > %s%s/%s_corpus.conllu"%(ins, groupdir, elem, ",".join(list(map(str,splits)))))
 
+  groupDir = "%s%s/"%(pvalues, group)
+  for model in groups[group] :
+    modeloutputs = " ".join(["%d_%s.conllu"%(split, model) for split in splits])
+    modelDir = "%s%s/"%(groupDir, model)
+    os.makedirs("%s"%(modelDir), exist_ok=True)
+    os.system("cat %s > %s%s_corpus.conllu"%(modeloutputs, modelDir, ",".join(list(map(str,splits)))))
+
+  btTraces.append(["%d_%s.trace"%(list(splits)[0], elem) for elem in groups[group] if "bt" in elem and "nobt" not in elem and "1" in elem][0])
   traces = " ".join(["%d_%s.trace"%(list(splits)[0], elem) for elem in groups[group]])
-  os.system("%s %s --stats > stats_%s.txt"%(readTrace, traces, group))
   os.system("%s %s --steps > steps_%s.txt"%(readTrace, traces, group))
-    
+
+os.system("%s %s --stats > stats_bt.txt"%(readTrace, " ".join(btTraces)))
+
diff --git a/outputs/pvalues/pvalues.sh b/outputs/pvalues/pvalues.sh
index efbe713..4abead2 100755
--- a/outputs/pvalues/pvalues.sh
+++ b/outputs/pvalues/pvalues.sh
@@ -1,24 +1,42 @@
 #!/bin/bash
-NAME='tagparser'
-SYSTEMS=`ls $NAME`
-METRICS="UAS UPOS"
-RUNS=10000
+RUNS=50000
+GRPS=('eager'
+      'tagger'
+      'tagparser')
 
-for metric in $METRICS; do
-  [[ $# -ne 0 ]] && SYSTEMS=$@
-  set -x
-  set -e
-  for sys in $SYSTEMS; do
-      mkdir -p results/$sys
-      for testset in `ls $NAME/$sys`; do
-          udapy read.Conllu zone=gold files=gold/$testset \
-                read.Conllu zone=pred files=$NAME/$sys/$testset ignore_sent_id=1 \
-                util.ResegmentGold \
-                eval.Conll18 print_results=0 print_raw=$metric \
-                > results/$sys/${testset%.conllu}
-      done
+computePVal () {
+  rm -r results
+  NAME="$1"
+  OUTPUT="$1.res"
+  METRICS="UAS UPOS"
+  if [ "$OUTPUT" = "tagger.res" ]; then
+    METRICS="UPOS"
+  fi
+  if [ "$OUTPUT" = "eager.res" ]; then
+    METRICS="UAS"
+  fi
+  SYSTEMS=$(ls $NAME)
+  > $OUTPUT
+  for metric in $METRICS; do
+    set -x
+    set -e
+    for sys in $SYSTEMS; do
+        mkdir -p results/$sys
+        for testset in `ls $NAME/$sys`; do
+            udapy read.Conllu zone=gold files=gold/$testset \
+                  read.Conllu zone=pred files=$NAME/$sys/$testset ignore_sent_id=1 \
+                  util.ResegmentGold \
+                  eval.Conll18 print_results=0 print_raw=$metric \
+                  > results/$sys/${testset%.conllu}
+        done
+    done
+    echo $metric >> $OUTPUT
+    python3 `python3 -c 'import conll18 as x; print(x.__file__)'` -r $RUNS >> $OUTPUT
+    echo "" >> $OUTPUT
   done
-  echo $metric
-  python3 `python3 -c 'import conll18 as x; print(x.__file__)'` -r $RUNS
-  echo ""
+}
+
+for grp in ${GRPS[@]}; do
+  computePVal $grp
 done
+
-- 
GitLab