From da2529d0e00a42ca499bc9d8ff9445653177db7c Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Sun, 4 Nov 2018 19:35:16 +0100
Subject: [PATCH] Added a script called average.py that is called from
 average.sh in eval folder. It is able to compute the standard deviation of
 experiments trained with the nbTrain argument

---
 FQB/eval/average.sh       |  6 ++++
 UD_fr/eval/average.sh     |  6 ++++
 en/eval/average.sh        |  6 ++++
 fr/eval/average.sh        |  6 ++++
 fr_orpheo/eval/average.sh |  6 ++++
 scripts/average.py        | 66 +++++++++++++++++++++++++++++++++++++++
 scripts/eval.py           | 20 ++++++++----
 scripts/train.sh          | 23 --------------
 8 files changed, 110 insertions(+), 29 deletions(-)
 create mode 100755 FQB/eval/average.sh
 create mode 100755 UD_fr/eval/average.sh
 create mode 100755 en/eval/average.sh
 create mode 100755 fr/eval/average.sh
 create mode 100755 fr_orpheo/eval/average.sh
 create mode 100755 scripts/average.py

diff --git a/FQB/eval/average.sh b/FQB/eval/average.sh
new file mode 100755
index 0000000..93eb09e
--- /dev/null
+++ b/FQB/eval/average.sh
@@ -0,0 +1,6 @@
+#! /bin/bash
+
+LANG=fr
+RES=$LANG.res
+
+exec ../../scripts/average.py $RES
diff --git a/UD_fr/eval/average.sh b/UD_fr/eval/average.sh
new file mode 100755
index 0000000..93eb09e
--- /dev/null
+++ b/UD_fr/eval/average.sh
@@ -0,0 +1,6 @@
+#! /bin/bash
+
+LANG=fr
+RES=$LANG.res
+
+exec ../../scripts/average.py $RES
diff --git a/en/eval/average.sh b/en/eval/average.sh
new file mode 100755
index 0000000..93eb09e
--- /dev/null
+++ b/en/eval/average.sh
@@ -0,0 +1,6 @@
+#! /bin/bash
+
+LANG=fr
+RES=$LANG.res
+
+exec ../../scripts/average.py $RES
diff --git a/fr/eval/average.sh b/fr/eval/average.sh
new file mode 100755
index 0000000..93eb09e
--- /dev/null
+++ b/fr/eval/average.sh
@@ -0,0 +1,6 @@
+#! /bin/bash
+
+LANG=fr
+RES=$LANG.res
+
+exec ../../scripts/average.py $RES
diff --git a/fr_orpheo/eval/average.sh b/fr_orpheo/eval/average.sh
new file mode 100755
index 0000000..93eb09e
--- /dev/null
+++ b/fr_orpheo/eval/average.sh
@@ -0,0 +1,6 @@
+#! /bin/bash
+
+LANG=fr
+RES=$LANG.res
+
+exec ../../scripts/average.py $RES
diff --git a/scripts/average.py b/scripts/average.py
new file mode 100755
index 0000000..2022190
--- /dev/null
+++ b/scripts/average.py
@@ -0,0 +1,66 @@
+#! /usr/bin/python3
+
+import sys
+
+COLSIZE1 = 21
+COLSIZE = 16
+nbCols = 0
+
+resByExpName = {}
+header = ""
+indexesToIgnore = [i for i in range(100)]
+for line in open(sys.argv[1], 'r') :
+  if len(header) == 0 :
+    header = line
+    continue
+  cols = line.split()
+  name = cols[0]
+  while len(name) > 0 and not name[-1] == '_' :
+    if name[-1].isdigit() :
+      name = name[:-1]
+    else :
+      break
+  if name[-1] == '_' :
+    name = name[:-1]
+  else : 
+    name = cols[0]
+  if name not in resByExpName :
+    resByExpName[name] = [[0.0,[]] for _ in cols]
+  resByExpName[name][0][0] += 1
+  nbCols = max(nbCols, len(cols)-1)
+  for i in range(1,len(cols)) :
+    col = cols[i]
+    while not col[-1].isdigit() :
+      col = col[:-1]
+    col = float(col)
+    if not col == 100.0 and i in indexesToIgnore :
+      indexesToIgnore.remove(i)
+    resByExpName[name][i][0] += col
+    resByExpName[name][i][1].append(col)
+
+print(header.split(" ")[0],end=" "*(COLSIZE1-len(header.split(" ")[0])))
+colCounter = 0
+for col in header.split(" ")[1:-1] :
+  if len(col) == 0 :
+    continue
+  colCounter += 1
+  if colCounter in indexesToIgnore :
+    continue
+  print(col,end=" "*(COLSIZE-len(col)))
+for experience in resByExpName :
+  print("\n"+experience,end=" "*(COLSIZE1-len(experience)))
+  nbSamples = resByExpName[experience][0][0]
+  for i in range(1,len(resByExpName[experience])) :
+    if i in indexesToIgnore :
+      continue
+    col = resByExpName[experience][i]
+    avg = col[0] / nbSamples
+    variance = 0.0
+    for val in col[1] :
+      variance += (avg-val)**2
+    variance /= nbSamples
+    deviation = (variance)**0.5
+    toPrint = "%.2f%%"%avg
+    toPrint += "["+u"\u00B1"+"%.2f%%]"%deviation
+    print(toPrint,end=" "*(COLSIZE-len(toPrint)))
+print()
diff --git a/scripts/eval.py b/scripts/eval.py
index 283d38b..c26f02e 100755
--- a/scripts/eval.py
+++ b/scripts/eval.py
@@ -18,6 +18,12 @@ experiences = []
 debug = ""
 evalArgs = " "
 
+binpath = os.environ["MACAON_DIR"] + "/" + lang + "/bin"
+eval_mcf = "../../tools/eval_mcf.py"
+result_file = lang + ".res"
+output = "output.txt"
+err = "stderr.log"
+
 for i in range(4, len(sys.argv)) :
   arg = sys.argv[i]
   if arg == "-d" or arg == "--debug" :
@@ -29,11 +35,14 @@ for i in range(4, len(sys.argv)) :
       break
     experiences += [arg.split('+')]
 
-binpath = os.environ["MACAON_DIR"] + "/" + lang + "/bin"
-eval_mcf = "../../tools/eval_mcf.py"
-result_file = lang + ".res"
-output = "output.txt"
-err = "stderr.log"
+for experience in experiences :
+  if experience[0][-1] == '*' :
+    name = experience[0]
+    experiences.remove(experience)
+    i = 0
+    while os.path.isdir(binpath + "/" + name[:-1] + str(i)) :
+      experiences.append([name[:-1] + str(i)])
+      i += 1
 
 firstWrite = True
 
@@ -54,7 +63,6 @@ for experience in experiences :
   error_occured = False
 
   for exp in experience :
-    
     process = subprocess.Popen(binpath + "/maca_tm_" + exp + " " + input_file + " " + mcd + " " + debug + " > " + output + " 2> " + err, shell=True) 
     process.wait()
     subprocess.Popen("cp " + output + " tmp_input", shell=True).wait()
diff --git a/scripts/train.sh b/scripts/train.sh
index 4bd3afc..cf186fb 100755
--- a/scripts/train.sh
+++ b/scripts/train.sh
@@ -41,28 +41,5 @@ if [ ! -d "$TEMPLATEPATH" ]; then
  exit
 fi
 
-# Here we create the decode script
-echo "\
-#! /bin/bash
-
-if [ \"\$#\" -lt 2 ]; then
- echo \"Usage : \$0 input mcd\"
- exit
-fi
-
-INPUT=\$1
-MCD=\$2
-
-shift
-shift
-ARGS=\"\"
-for arg in \"\$@\"
-do
-  ARGS=\"\$ARGS \$arg\"
-done
-
-macaon_decode --lang $LANG --tm machine.tm --bd test.bd -I \$INPUT --mcd \$MCD --expName $EXPNAME \$ARGS\
-" > $TEMPLATEPATH/decode.sh
-
 # We start the training
 macaon_train --tm machine.tm --bd train.bd --mcd ../../data/wpmlgfs.mcd -T ../../data/train.mcf --dev ../../data/dev.mcf --expName $EXPNAME --lang $LANG $ARGS --templateName $TEMPLATENAME
-- 
GitLab