Skip to content
Snippets Groups Projects
Commit f1f18db2 authored by Franck Dary's avatar Franck Dary
Browse files

Updated for tokenizer

parent 4ed5c4e0
No related branches found
No related tags found
No related merge requests found
...@@ -22,9 +22,9 @@ all_no_test.conllu: ...@@ -22,9 +22,9 @@ all_no_test.conllu:
cat $(TRAIN_FILES) > $@ cat $(TRAIN_FILES) > $@
tokenizer.ts: all_no_test.conllu $(MCD) tokenizer.ts: all_no_test.conllu $(MCD)
echo "IGNORECHAR" > $@ echo "ENDWORD" > $@
echo "IGNORECHAR" >> $@
$(SCRIPTS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt $(SCRIPTS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt
echo "ENDWORD" >> $@
echo "ADDCHARTOWORD" >> $@ echo "ADDCHARTOWORD" >> $@
sed -i -e 's/^/<tokenizer> /' $@ sed -i -e 's/^/<tokenizer> /' $@
...@@ -40,7 +40,7 @@ columns: all_no_test.conllu $(MCD) ...@@ -40,7 +40,7 @@ columns: all_no_test.conllu $(MCD)
cat tagger.ts parser.ts > taggerparser.ts cat tagger.ts parser.ts > taggerparser.ts
texts: texts:
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILEs) $(TEST_FILES) ./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
$(FPLM_FILENAME): all_no_test.conllu $(MCD) $(FPLM_FILENAME): all_no_test.conllu $(MCD)
$(SCRIPTS)/conllu2fplm.py $< $(MCD) > $@ $(SCRIPTS)/conllu2fplm.py $< $(MCD) > $@
......
Name : Tokenizer Machine Name : Tokenizer Machine
Classifier : tokenizer CNN(4,0,0,{FORM},{-1,0},{},{FORM},{10}) data/tokenizer.ts Classifier : tokenizer CNN(4,0,0,{FORM},{-1,0},{},{FORM},{10}) data/tokenizer.ts
Predictions : FORM Predictions : ID FORM EOS
Strategy : sequential Strategy : sequential
tokenizer tokenizer ENDWORD 1 tokenizer tokenizer ENDWORD 1
tokenizer tokenizer SPLITWORD 1 tokenizer tokenizer SPLITWORD 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment