From 5af5e5c04978a18c3c6590fe33854d101b5a92c2 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Tue, 16 Feb 2021 15:34:18 +0100 Subject: [PATCH] updated scripts --- UD_any/data/Makefile | 20 ++++++++++++++++---- scripts/conll18_ud_eval.py | 5 ++++- scripts/conllu2splits.py | 3 +++ scripts/launchSlurmArray.py | 1 - 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index 7f76b65..22240cf 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -10,9 +10,9 @@ TEST_FILES=$(shell find . -type f -name '*test*.conllu') THRESHOLD=10 FPLM_FILENAME=fplm -all_text: writescore_TIME.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain +all_text: writescore_NFIX.ts writescore_FFD.ts writescore_GPT.ts writescore_TRT.ts writescore_FIXPROP.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain rm -f all_no_test.conllu -all_lines: writescore_TIME.ts tokenizer.ts segmenter.ts texts_lines all_no_test.conllu transitions pretrain +all_lines: writescore_FFD.ts tokenizer.ts segmenter.ts texts_lines all_no_test.conllu transitions pretrain rm -f all_no_test.conllu all_no_test.conllu: @@ -44,8 +44,20 @@ segmenter.ts: echo "NOTEOS b.0" >> $@ sed -i -e 's/^/<segmenter> /' $@ -writescore_TIME.ts: - echo "WRITESCORE b.0 TOTAL_FIXATION_DURATION" > $@ +writescore_NFIX.ts: + echo "WRITESCORE b.0 NFIX" > $@ + +writescore_FFD.ts: + echo "WRITESCORE b.0 FFD" > $@ + +writescore_GPT.ts: + echo "WRITESCORE b.0 GPT" > $@ + +writescore_TRT.ts: + echo "WRITESCORE b.0 RTR" > $@ + +writescore_FIXPROP.ts: + echo "WRITESCORE b.0 FIXPROP" > $@ transitions: all_no_test.conllu ./getTransitionSets.py $< diff --git a/scripts/conll18_ud_eval.py b/scripts/conll18_ud_eval.py index 2b760bd..ee553ac 100755 --- a/scripts/conll18_ud_eval.py +++ b/scripts/conll18_ud_eval.py @@ -246,6 +246,7 @@ def load_conllu(file) : # Load the CoNLL-U file index, sentence_start = 0, None + id_starts_at_zero = False while True : line = file.readline() if not line : @@ -337,9 +338,11 @@ def load_conllu(file) : else : try : word_id = int(columns[col2index["ID"]]) if "ID" in col2index else "_" + if word_id == 0 : + id_starts_at_zero = True except : raise UDError("Cannot parse word ID '{}'".format(_encode(columns[col2index["ID"]]))) - if word_id != len(ud.words) - sentence_start + 1 : + if word_id != len(ud.words) - sentence_start + (0 if id_starts_at_zero else 1) : raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format( _encode(columns[col2index["ID"]]), _encode(columns[col2index["FORM"]]), len(ud.words) - sentence_start + 1)) diff --git a/scripts/conllu2splits.py b/scripts/conllu2splits.py index d4d939c..76a8cec 100755 --- a/scripts/conllu2splits.py +++ b/scripts/conllu2splits.py @@ -53,6 +53,9 @@ def main() : sentence = [] continue + if "ID" not in col2index or "FORM" not in col2index : + break + idId = int(col2index["ID"]) idForm = int(col2index["FORM"]) diff --git a/scripts/launchSlurmArray.py b/scripts/launchSlurmArray.py index ef6ac5d..69d9eb1 100644 --- a/scripts/launchSlurmArray.py +++ b/scripts/launchSlurmArray.py @@ -69,7 +69,6 @@ eval "${{commands[$SLURM_ARRAY_TASK_ID]}}" #SBATCH --cpus-per-task=1 #SBATCH --hint=nomultithread #SBATCH --partition={} -#SBATCH --exclude=sensei1,lifnode1,asfalda1 #SBATCH --time={}:00:00 module purge -- GitLab