Skip to content
Snippets Groups Projects
Commit ac4b5d0a authored by Franck Dary's avatar Franck Dary
Browse files

Adapted to new split transition

parent 46445f01
No related branches found
No related tags found
No related merge requests found
...@@ -23,10 +23,14 @@ all_no_test.conllu: ...@@ -23,10 +23,14 @@ all_no_test.conllu:
tokenizer.ts: all_no_test.conllu $(MCD) tokenizer.ts: all_no_test.conllu $(MCD)
echo "ENDWORD" > $@ echo "ENDWORD" > $@
$(SCRIPTS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt $(SCRIPTS)/conllu2splits.py $< $(MCD) > splitwords.ts 2> ambiguities.txt
echo "SPLIT 0" >> $@
echo "SPLIT 1" >> $@
echo "SPLIT 2" >> $@
echo "ADDCHARTOWORD" >> $@ echo "ADDCHARTOWORD" >> $@
echo "IGNORECHAR" >> $@ echo "IGNORECHAR" >> $@
sed -i -e 's/^/<tokenizer> /' $@ sed -i -e 's/^/<tokenizer> /' $@
sed -i -e 's/^/<tokenizer> /' splitwords.ts
segmenter.ts: segmenter.ts:
echo "EOS b.0" > $@ echo "EOS b.0" > $@
......
Name : Tokenizer Machine Name : Tokenizer Machine
Classifier : tokenizer LSTM(-1,{-3,-2,-1},{},{FORM},{-1,0},{},{FORM},{10},5,5) data/tokenizer.ts Classifier : tokenizer LSTM(-1,{-3,-2,-1},{},{FORM},{-1,0},{},{ID,FORM},{1,10},5,5) data/tokenizer.ts
Splitwords : data/splitwords.ts
Predictions : ID FORM EOS Predictions : ID FORM EOS
Strategy : sequential Strategy : sequential
tokenizer tokenizer ENDWORD 1 tokenizer tokenizer ENDWORD 1
tokenizer tokenizer SPLITWORD 1 tokenizer tokenizer SPLIT 1
tokenizer tokenizer 0 tokenizer tokenizer 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment