Skip to content
Snippets Groups Projects
Commit f1f18db2 authored by Franck Dary's avatar Franck Dary
Browse files

Updated for tokenizer

parent 4ed5c4e0
No related branches found
No related tags found
No related merge requests found
......@@ -22,9 +22,9 @@ all_no_test.conllu:
cat $(TRAIN_FILES) > $@
tokenizer.ts: all_no_test.conllu $(MCD)
echo "IGNORECHAR" > $@
echo "ENDWORD" > $@
echo "IGNORECHAR" >> $@
$(SCRIPTS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt
echo "ENDWORD" >> $@
echo "ADDCHARTOWORD" >> $@
sed -i -e 's/^/<tokenizer> /' $@
......@@ -40,7 +40,7 @@ columns: all_no_test.conllu $(MCD)
cat tagger.ts parser.ts > taggerparser.ts
texts:
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILEs) $(TEST_FILES)
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
$(FPLM_FILENAME): all_no_test.conllu $(MCD)
$(SCRIPTS)/conllu2fplm.py $< $(MCD) > $@
......
Name : Tokenizer Machine
Classifier : tokenizer CNN(4,0,0,{FORM},{-1,0},{},{FORM},{10}) data/tokenizer.ts
Predictions : FORM
Predictions : ID FORM EOS
Strategy : sequential
tokenizer tokenizer ENDWORD 1
tokenizer tokenizer SPLITWORD 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment