# TODO give mcd argument to all scripts SCRIPTS=../../../../scripts CONLL2TXT=$(SCRIPTS)/conllu_to_text.pl CONLL2LINES=$(SCRIPTS)/conllu_to_lines.sh TRAIN_FILES=$(shell find . -name '*train*.conllu') DEV_FILES=$(shell find . -name '*dev*.conllu') TEST_FILES=$(shell find . -name '*test*.conllu') #This part is for lemmatizer rules and excpetions computation THRESHOLD=10 FPLM_FILENAME=fplm all_text: writescore_NFIX.ts writescore_FFD.ts writescore_GPT.ts writescore_TRT.ts writescore_FIXPROP.ts tokenizer.ts segmenter.ts texts transitions pretrain rm -f all_no_test.conllu all_lines: writescore_NFIX.ts writescore_FFD.ts writescore_GPT.ts writescore_TRT.ts writescore_FIXPROP.ts tokenizer.ts segmenter.ts texts_lines transitions pretrain rm -f all_no_test.conllu tokenizer.ts: train.conllu echo "ENDWORD" > $@ $(SCRIPTS)/conllu2splits.py $< $(MCD) > splitwords.ts 2> ambiguities.txt echo "SPLIT 0" >> $@ echo "SPLIT 1" >> $@ echo "SPLIT 2" >> $@ echo "SPLIT 3" >> $@ echo "SPLIT 4" >> $@ echo "SPLIT 5" >> $@ echo "SPLIT 6" >> $@ echo "SPLIT 7" >> $@ echo "ADDCHARTOWORD 1" >> $@ echo "ADDCHARTOWORD 2" >> $@ echo "ADDCHARTOWORD 3" >> $@ echo "ADDCHARTOWORD 4" >> $@ echo "ADDCHARTOWORD 5" >> $@ echo "ADDCHARTOWORD 6" >> $@ echo "IGNORECHAR" >> $@ sed -i -e 's/^/<tokenizer> /' $@ sed -i -e 's/^/<tokenizer> /' splitwords.ts segmenter.ts: echo "EOS b.0" > $@ echo "NOTEOS b.0" >> $@ sed -i -e 's/^/<segmenter> /' $@ writescore_NFIX.ts: echo "WRITESCORE b.0 NFIX" > $@ writescore_FFD.ts: echo "WRITESCORE b.0 FFD" > $@ writescore_GPT.ts: echo "WRITESCORE b.0 GPT" > $@ writescore_TRT.ts: echo "WRITESCORE b.0 TRT" > $@ writescore_FIXPROP.ts: echo "WRITESCORE b.0 FIXPROP" > $@ transitions: train.conllu ./getTransitionSets.py $< $(MCD) texts: ./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES) texts_lines: ./getRawText.py $(CONLL2LINES) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES) pretrain: for col in $(PRETRAINED_COLS) ; do \ ./pretrainEmbeddings.sh $(TRAIN_FILES) $$col 128 $$col.w2v 2> pretrain_log.err || ( cat pretrain_log.err && exit 1 ) ; \ done $(FPLM_FILENAME): train.conllu $(SCRIPTS)/conllu2fplm.py $< > $@ clean: - rm -f *\.ts - rm -f ambiguities\.txt - rm -f $(FPLM_FILENAME)