Skip to content
Snippets Groups Projects
Commit b7aefa89 authored by Franck Dary's avatar Franck Dary
Browse files

Added support for option lineByLine

parent 453519ed
No related branches found
No related tags found
No related merge requests found
SCRIPTS=../../../../scripts
CONLL2TXT=$(SCRIPTS)/conllu_to_text.pl
CONLL2LINES=$(SCRIPTS)/conllu_to_lines.sh
TRAIN_FILES=$(shell find . -type f -name '*train*.conllu')
DEV_FILES=$(shell find . -type f -name '*dev*.conllu')
......@@ -9,7 +10,9 @@ TEST_FILES=$(shell find . -type f -name '*test*.conllu')
THRESHOLD=10
FPLM_FILENAME=fplm
all: writescore_TIME.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain
all_text: writescore_TIME.ts tokenizer.ts segmenter.ts texts all_no_test.conllu transitions pretrain
rm -f all_no_test.conllu
all_lines: writescore_TIME.ts tokenizer.ts segmenter.ts texts_lines all_no_test.conllu transitions pretrain
rm -f all_no_test.conllu
all_no_test.conllu:
......@@ -50,6 +53,9 @@ transitions: all_no_test.conllu
texts:
./getRawText.py $(CONLL2TXT) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
texts_lines:
./getRawText.py $(CONLL2LINES) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
pretrain:
for col in FORM UPOS LEMMA FEATS DEPREL LETTERS ; do \
./pretrainEmbeddings.sh $(TRAIN_FILES) $$col 64 $$col.w2v 2> pretrain_log.err || ( cat pretrain_log.err && exit 1 ) ; \
......
......@@ -31,8 +31,14 @@ if [ ! -d "$EXPPATH" ]; then
print_usage_and_exit
fi
TARGET="all_text"
if [[ "$*" == *--lineByLine* ]]
then
TARGET="all_lines"
fi
CURDIR=$(pwd)
cd $EXPPATH"/"data && make -s clean && make -s
cd $EXPPATH"/"data && make -s clean && make $TARGET -s
cd $CURDIR
TRAIN=$EXPPATH"/data/train.conllu"
......
#! /usr/bin/env bash
grep "# text =" $1 | cut -c '10-'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment