From da2529d0e00a42ca499bc9d8ff9445653177db7c Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Sun, 4 Nov 2018 19:35:16 +0100 Subject: [PATCH] Added a script called average.py that is called from average.sh in eval folder. It is able to compute the standard deviation of experiments trained with the nbTrain argument --- FQB/eval/average.sh | 6 ++++ UD_fr/eval/average.sh | 6 ++++ en/eval/average.sh | 6 ++++ fr/eval/average.sh | 6 ++++ fr_orpheo/eval/average.sh | 6 ++++ scripts/average.py | 66 +++++++++++++++++++++++++++++++++++++++ scripts/eval.py | 20 ++++++++---- scripts/train.sh | 23 -------------- 8 files changed, 110 insertions(+), 29 deletions(-) create mode 100755 FQB/eval/average.sh create mode 100755 UD_fr/eval/average.sh create mode 100755 en/eval/average.sh create mode 100755 fr/eval/average.sh create mode 100755 fr_orpheo/eval/average.sh create mode 100755 scripts/average.py diff --git a/FQB/eval/average.sh b/FQB/eval/average.sh new file mode 100755 index 0000000..93eb09e --- /dev/null +++ b/FQB/eval/average.sh @@ -0,0 +1,6 @@ +#! /bin/bash + +LANG=fr +RES=$LANG.res + +exec ../../scripts/average.py $RES diff --git a/UD_fr/eval/average.sh b/UD_fr/eval/average.sh new file mode 100755 index 0000000..93eb09e --- /dev/null +++ b/UD_fr/eval/average.sh @@ -0,0 +1,6 @@ +#! /bin/bash + +LANG=fr +RES=$LANG.res + +exec ../../scripts/average.py $RES diff --git a/en/eval/average.sh b/en/eval/average.sh new file mode 100755 index 0000000..93eb09e --- /dev/null +++ b/en/eval/average.sh @@ -0,0 +1,6 @@ +#! /bin/bash + +LANG=fr +RES=$LANG.res + +exec ../../scripts/average.py $RES diff --git a/fr/eval/average.sh b/fr/eval/average.sh new file mode 100755 index 0000000..93eb09e --- /dev/null +++ b/fr/eval/average.sh @@ -0,0 +1,6 @@ +#! /bin/bash + +LANG=fr +RES=$LANG.res + +exec ../../scripts/average.py $RES diff --git a/fr_orpheo/eval/average.sh b/fr_orpheo/eval/average.sh new file mode 100755 index 0000000..93eb09e --- /dev/null +++ b/fr_orpheo/eval/average.sh @@ -0,0 +1,6 @@ +#! /bin/bash + +LANG=fr +RES=$LANG.res + +exec ../../scripts/average.py $RES diff --git a/scripts/average.py b/scripts/average.py new file mode 100755 index 0000000..2022190 --- /dev/null +++ b/scripts/average.py @@ -0,0 +1,66 @@ +#! /usr/bin/python3 + +import sys + +COLSIZE1 = 21 +COLSIZE = 16 +nbCols = 0 + +resByExpName = {} +header = "" +indexesToIgnore = [i for i in range(100)] +for line in open(sys.argv[1], 'r') : + if len(header) == 0 : + header = line + continue + cols = line.split() + name = cols[0] + while len(name) > 0 and not name[-1] == '_' : + if name[-1].isdigit() : + name = name[:-1] + else : + break + if name[-1] == '_' : + name = name[:-1] + else : + name = cols[0] + if name not in resByExpName : + resByExpName[name] = [[0.0,[]] for _ in cols] + resByExpName[name][0][0] += 1 + nbCols = max(nbCols, len(cols)-1) + for i in range(1,len(cols)) : + col = cols[i] + while not col[-1].isdigit() : + col = col[:-1] + col = float(col) + if not col == 100.0 and i in indexesToIgnore : + indexesToIgnore.remove(i) + resByExpName[name][i][0] += col + resByExpName[name][i][1].append(col) + +print(header.split(" ")[0],end=" "*(COLSIZE1-len(header.split(" ")[0]))) +colCounter = 0 +for col in header.split(" ")[1:-1] : + if len(col) == 0 : + continue + colCounter += 1 + if colCounter in indexesToIgnore : + continue + print(col,end=" "*(COLSIZE-len(col))) +for experience in resByExpName : + print("\n"+experience,end=" "*(COLSIZE1-len(experience))) + nbSamples = resByExpName[experience][0][0] + for i in range(1,len(resByExpName[experience])) : + if i in indexesToIgnore : + continue + col = resByExpName[experience][i] + avg = col[0] / nbSamples + variance = 0.0 + for val in col[1] : + variance += (avg-val)**2 + variance /= nbSamples + deviation = (variance)**0.5 + toPrint = "%.2f%%"%avg + toPrint += "["+u"\u00B1"+"%.2f%%]"%deviation + print(toPrint,end=" "*(COLSIZE-len(toPrint))) +print() diff --git a/scripts/eval.py b/scripts/eval.py index 283d38b..c26f02e 100755 --- a/scripts/eval.py +++ b/scripts/eval.py @@ -18,6 +18,12 @@ experiences = [] debug = "" evalArgs = " " +binpath = os.environ["MACAON_DIR"] + "/" + lang + "/bin" +eval_mcf = "../../tools/eval_mcf.py" +result_file = lang + ".res" +output = "output.txt" +err = "stderr.log" + for i in range(4, len(sys.argv)) : arg = sys.argv[i] if arg == "-d" or arg == "--debug" : @@ -29,11 +35,14 @@ for i in range(4, len(sys.argv)) : break experiences += [arg.split('+')] -binpath = os.environ["MACAON_DIR"] + "/" + lang + "/bin" -eval_mcf = "../../tools/eval_mcf.py" -result_file = lang + ".res" -output = "output.txt" -err = "stderr.log" +for experience in experiences : + if experience[0][-1] == '*' : + name = experience[0] + experiences.remove(experience) + i = 0 + while os.path.isdir(binpath + "/" + name[:-1] + str(i)) : + experiences.append([name[:-1] + str(i)]) + i += 1 firstWrite = True @@ -54,7 +63,6 @@ for experience in experiences : error_occured = False for exp in experience : - process = subprocess.Popen(binpath + "/maca_tm_" + exp + " " + input_file + " " + mcd + " " + debug + " > " + output + " 2> " + err, shell=True) process.wait() subprocess.Popen("cp " + output + " tmp_input", shell=True).wait() diff --git a/scripts/train.sh b/scripts/train.sh index 4bd3afc..cf186fb 100755 --- a/scripts/train.sh +++ b/scripts/train.sh @@ -41,28 +41,5 @@ if [ ! -d "$TEMPLATEPATH" ]; then exit fi -# Here we create the decode script -echo "\ -#! /bin/bash - -if [ \"\$#\" -lt 2 ]; then - echo \"Usage : \$0 input mcd\" - exit -fi - -INPUT=\$1 -MCD=\$2 - -shift -shift -ARGS=\"\" -for arg in \"\$@\" -do - ARGS=\"\$ARGS \$arg\" -done - -macaon_decode --lang $LANG --tm machine.tm --bd test.bd -I \$INPUT --mcd \$MCD --expName $EXPNAME \$ARGS\ -" > $TEMPLATEPATH/decode.sh - # We start the training macaon_train --tm machine.tm --bd train.bd --mcd ../../data/wpmlgfs.mcd -T ../../data/train.mcf --dev ../../data/dev.mcf --expName $EXPNAME --lang $LANG $ARGS --templateName $TEMPLATENAME -- GitLab