From 07795c9df9be2ca64732247d0ac21522cb605e15 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 3 Apr 2020 18:59:45 +0200 Subject: [PATCH] Added more SPLIT transitions because arabic needs it --- UD_any/data/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile index 904e3fa..25963db 100644 --- a/UD_any/data/Makefile +++ b/UD_any/data/Makefile @@ -27,6 +27,11 @@ tokenizer.ts: all_no_test.conllu $(MCD) echo "SPLIT 0" >> $@ echo "SPLIT 1" >> $@ echo "SPLIT 2" >> $@ + echo "SPLIT 3" >> $@ + echo "SPLIT 4" >> $@ + echo "SPLIT 5" >> $@ + echo "SPLIT 6" >> $@ + echo "SPLIT 7" >> $@ echo "ADDCHARTOWORD" >> $@ echo "IGNORECHAR" >> $@ sed -i -e 's/^/<tokenizer> /' $@ -- GitLab