Skip to content
Snippets Groups Projects
Commit f1f18db2 authored by Franck Dary's avatar Franck Dary
Browse files

Updated for tokenizer

parent 4ed5c4e0
Branches
Tags
No related merge requests found
......@@ -22,9 +22,9 @@ all_no_test.conllu:
cat $(TRAIN_FILES) > $@
tokenizer.ts: all_no_test.conllu $(MCD)
echo "IGNORECHAR" > $@
echo "ENDWORD" > $@
echo "IGNORECHAR" >> $@
$(SCRIPTS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt
echo "ENDWORD" >> $@
echo "ADDCHARTOWORD" >> $@
sed -i -e 's/^/<tokenizer> /' $@
......@@ -40,7 +40,7 @@ columns: all_no_test.conllu $(MCD)
cat tagger.ts parser.ts > taggerparser.ts
texts:
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILEs) $(TEST_FILES)
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
$(FPLM_FILENAME): all_no_test.conllu $(MCD)
$(SCRIPTS)/conllu2fplm.py $< $(MCD) > $@
......
Name : Tokenizer Machine
Classifier : tokenizer CNN(4,0,0,{FORM},{-1,0},{},{FORM},{10}) data/tokenizer.ts
Predictions : FORM
Predictions : ID FORM EOS
Strategy : sequential
tokenizer tokenizer ENDWORD 1
tokenizer tokenizer SPLITWORD 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment