Skip to content
Snippets Groups Projects
Commit 5af5e5c0 authored by Franck Dary's avatar Franck Dary
Browse files

updated scripts

parent 35dba2dc
No related branches found
No related tags found
No related merge requests found
......@@ -10,9 +10,9 @@ TEST_FILES=$(shell find . -type f -name '*test*.conllu')
THRESHOLD=10
FPLM_FILENAME=fplm
all_text: writescore_TIME.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain
all_text: writescore_NFIX.ts writescore_FFD.ts writescore_GPT.ts writescore_TRT.ts writescore_FIXPROP.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain
rm -f all_no_test.conllu
all_lines: writescore_TIME.ts tokenizer.ts segmenter.ts texts_lines all_no_test.conllu transitions pretrain
all_lines: writescore_FFD.ts tokenizer.ts segmenter.ts texts_lines all_no_test.conllu transitions pretrain
rm -f all_no_test.conllu
all_no_test.conllu:
......@@ -44,8 +44,20 @@ segmenter.ts:
echo "NOTEOS b.0" >> $@
sed -i -e 's/^/<segmenter> /' $@
writescore_TIME.ts:
echo "WRITESCORE b.0 TOTAL_FIXATION_DURATION" > $@
writescore_NFIX.ts:
echo "WRITESCORE b.0 NFIX" > $@
writescore_FFD.ts:
echo "WRITESCORE b.0 FFD" > $@
writescore_GPT.ts:
echo "WRITESCORE b.0 GPT" > $@
writescore_TRT.ts:
echo "WRITESCORE b.0 RTR" > $@
writescore_FIXPROP.ts:
echo "WRITESCORE b.0 FIXPROP" > $@
transitions: all_no_test.conllu
./getTransitionSets.py $<
......
......@@ -246,6 +246,7 @@ def load_conllu(file) :
# Load the CoNLL-U file
index, sentence_start = 0, None
id_starts_at_zero = False
while True :
line = file.readline()
if not line :
......@@ -337,9 +338,11 @@ def load_conllu(file) :
else :
try :
word_id = int(columns[col2index["ID"]]) if "ID" in col2index else "_"
if word_id == 0 :
id_starts_at_zero = True
except :
raise UDError("Cannot parse word ID '{}'".format(_encode(columns[col2index["ID"]])))
if word_id != len(ud.words) - sentence_start + 1 :
if word_id != len(ud.words) - sentence_start + (0 if id_starts_at_zero else 1) :
raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(
_encode(columns[col2index["ID"]]), _encode(columns[col2index["FORM"]]), len(ud.words) - sentence_start + 1))
......
......@@ -53,6 +53,9 @@ def main() :
sentence = []
continue
if "ID" not in col2index or "FORM" not in col2index :
break
idId = int(col2index["ID"])
idForm = int(col2index["FORM"])
......
......@@ -69,7 +69,6 @@ eval "${{commands[$SLURM_ARRAY_TASK_ID]}}"
#SBATCH --cpus-per-task=1
#SBATCH --hint=nomultithread
#SBATCH --partition={}
#SBATCH --exclude=sensei1,lifnode1,asfalda1
#SBATCH --time={}:00:00
module purge
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment