Skip to content
Snippets Groups Projects
Commit 9074e66f authored by Franck Dary's avatar Franck Dary
Browse files

Changed the way we train and evaluate

parent d0acd064
No related branches found
No related tags found
No related merge requests found
include ../config
SCRIPTS=../../scripts
SCRIPTS=../../../../scripts
CONLL2TXT=$(SCRIPTS)/conllu_to_text.pl
MCD=conllu.mcd
TRAIN_FILES=$(shell find $(UD_ROOT) -type f -name '*train*.conllu')
DEV_FILES=$(shell find $(UD_ROOT) -type f -name '*dev*.conllu')
TEST_FILES=$(shell find $(UD_ROOT) -type f -name '*test*.conllu')
TRAIN_FILES=$(shell find $(CORPUS) -type f -name '*train*.conllu')
DEV_FILES=$(shell find $(CORPUS) -type f -name '*dev*.conllu')
TEST_FILES=$(shell find $(CORPUS) -type f -name '*test*.conllu')
#This part is for lemmatizer rules and excpetions computation
THRESHOLD=10
......@@ -15,8 +15,8 @@ RULES_FILENAME=lemmatizer_rules.ts
EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm
all: tokenizer.ts segmenter.ts texts all_no_test.conllu columns $(FPLM_FILENAME) $(RULES_FILENAME)
rm col_*\.txt
rm all_no_test.conllu
rm -f col_*\.txt
rm -f all_no_test.conllu
all_no_test.conllu:
cat $(TRAIN_FILES) > $@
......@@ -47,14 +47,14 @@ $(FPLM_FILENAME): all_no_test.conllu $(MCD)
$(RULES_FILENAME): $(FPLM_FILENAME)
macaon_compute_l_rules -f $(FPLM_FILENAME) -e $(EXCEPTIONS_FPLM_FILENAME) -r tmp.txt -t $(THRESHOLD)
rm tmp.txt
rm -f tmp.txt
echo -e "Default : NOTHING\nTOLOWER b.0 LEMMA\nTOUPPER b.0 LEMMA" > lemmatizer_case.ts
clean:
- rm *\.txt
- rm *\.conll*
- rm *\.ts
- rm $(RULES_FILENAME)
- rm $(EXCEPTIONS_FPLM_FILENAME)
- rm $(FPLM_FILENAME)
- rm -f *\.txt
- rm -f *\.conll*
- rm -f *\.ts
- rm -f $(RULES_FILENAME)
- rm -f $(EXCEPTIONS_FPLM_FILENAME)
- rm -f $(FPLM_FILENAME)
......@@ -2,7 +2,7 @@
import sys
sys.path.insert(1, '../../scripts')
sys.path.insert(1, '../../../../scripts')
from readMCD import readMCD
......
#! /bin/bash
source config
function has_space {
[[ "$1" != "${1%[[:space:]]*}" ]] && return 0 || return 1
}
function print_usage_and_exit {
>&2 echo "USAGE : (tsv | txt) language_keyword templateName expName [arguments]"
>&2 echo "USAGE : (tsv | txt) expPath [arguments]"
exit 1
}
MCD=data/conllu.mcd
MODE=$1
KEYWORD=$2
EXPNAME=$3
EXPPATH=$2
if [ -z "$KEYWORD" ];
if [ -z "$MODE" ];
then
>&2 echo "ERROR : missing argument 1 (keyword)"
>&2 echo "ERROR : missing argument 1 (mode)"
print_usage_and_exit
fi
if [ -z "$EXPNAME" ];
if [ -z "$EXPPATH" ];
then
>&2 echo "ERROR : missing argument 2 (expName)"
>&2 echo "ERROR : missing argument 2 (expPath)"
print_usage_and_exit
fi
shift
shift
shift
if [ "$KEYWORD" = "." ]
then
KEYWORD=""
if [ ! -d "$EXPPATH" ]; then
>&2 echo "ERROR : directory $EXPPATH doesn't exist"
print_usage_and_exit
fi
TEST=$(find $UD_ROOT*$KEYWORD -type f -name '*test*.conllu')
TESTRAW=$(find $UD_ROOT*$KEYWORD -type f -name '*test*.txt')
DEV=$(find $UD_ROOT*$KEYWORD -type f -name '*dev*.conllu')
DEVRAW=$(find $UD_ROOT*$KEYWORD -type f -name '*dev*.txt')
TRAIN=$(find $UD_ROOT*$KEYWORD -type f -name '*train*.conllu')
TRAINRAW=$(find $UD_ROOT*$KEYWORD -type f -name '*train*.txt')
source $EXPPATH"/config"
EVALTARGET=$TEST
EVALTARGETRAW=$TESTRAW
TRAIN=$(find $CORPUS -type f -name '*train*.conllu')
TRAINRAW=$(find $CORPUS -type f -name '*train*.txt')
DEV=$(find $CORPUS -type f -name '*dev*.conllu')
DEVRAW=$(find $CORPUS -type f -name '*dev*.txt')
TEST=$(find $CORPUS -type f -name '*test*.conllu')
TESTRAW=$(find $CORPUS -type f -name '*test*.txt')
if has_space "$EVALTARGET";
REF=$TEST
REFRAW=$TESTRAW
if has_space "$REF" || has_space "$REFRAW";
then
>&2 echo "ERROR : more than 1 match with keyword" $KEYWORD
>&2 echo "TEST : " $EVALTARGET
>&2 echo "ERROR : more than 1 match"
>&2 echo "REF : " $REF
>&2 echo "REFRAW : " $REFRAW
print_usage_and_exit
fi
if test ! -f $EVALTARGET;
if test ! -f $REF;
then
>&2 echo "ERROR : no ref file found in" $CORPUS
>&2 echo "$REF"
print_usage_and_exit
fi
if test ! -f $REFRAW;
then
>&2 echo "ERROR : no target file found with keyword" $KEYWORD
>&2 echo "$EVALTARGET"
>&2 echo "ERROR : no ref file found in" $CORPUS
>&2 echo "$REFRAW"
print_usage_and_exit
fi
MCD=$EXPPATH"/data/*\.mcd"
EVALCONLL="../scripts/conll18_ud_eval.py"
OUTPUT=$EXPNAME"/predicted_eval.tsv"
OUTPUT=$EXPPATH"/predicted_eval.tsv"
if [ "$MODE" = "tsv" ]; then
macaon decode --model $EXPNAME --mcd $MCD --inputTSV $EVALTARGET $@ > $OUTPUT && $EVALCONLL $EVALTARGET $OUTPUT -v || exit 1
macaon decode --model $EXPPATH --mcd $MCD --inputTSV $REF $@ > $OUTPUT && $EVALCONLL $REF $OUTPUT -v || exit 1
exit 0
fi
if [ "$MODE" = "txt" ]; then
macaon decode --model $EXPNAME --mcd $MCD --inputTXT $EVALTARGETRAW $@ > $OUTPUT && $EVALCONLL $EVALTARGET $OUTPUT -v || exit 1
macaon decode --model $EXPPATH --mcd $MCD --inputTXT $REFRAW $@ > $OUTPUT && $EVALCONLL $REF $OUTPUT -v || exit 1
exit 0
fi
......
#! /bin/bash
source config
function print_usage_and_exit {
>&2 echo "USAGE : language templateName expName"
exit 1
}
LANG=$1
TEMPLATENAME=$2
EXPNAME=$3
if [ -z "$LANG" ];
then
>&2 echo "ERROR : missing argument 1 (lang)"
print_usage_and_exit
fi
if [ -z "$TEMPLATENAME" ];
then
>&2 echo "ERROR : missing argument 2 (templateName)"
print_usage_and_exit
fi
if [ -z "$EXPNAME" ];
then
>&2 echo "ERROR : missing argument 3 (expName)"
print_usage_and_exit
fi
if [ ! -d "$TEMPLATENAME" ]; then
>&2 echo "ERROR : directory $TEMPLATENAME doesn't exist"
print_usage_and_exit
fi
mkdir -p bin
rm -rf bin/$EXPNAME
cp -r $TEMPLATENAME bin/$EXPNAME
cp -r "data" bin/$EXPNAME/.
echo "CORPUS="$UD_ROOT"/"$LANG > bin/$EXPNAME/config
#! /bin/bash
source config
function has_space {
[[ "$1" != "${1%[[:space:]]*}" ]] && return 0 || return 1
}
function print_usage_and_exit {
>&2 echo "USAGE : (tsv | txt) language_keyword templateName expName [arguments]"
>&2 echo "USAGE : (tsv | txt) expPath [arguments]"
exit 1
}
MCD=data/conllu.mcd
MODE=$1
KEYWORD=$2
TEMPLATENAME=$3
EXPNAME=$4
if [ -z "$KEYWORD" ];
then
>&2 echo "ERROR : missing argument 1 (keyword)"
print_usage_and_exit
fi
EXPPATH=$2
if [ -z "$TEMPLATENAME" ];
if [ -z "$MODE" ];
then
>&2 echo "ERROR : missing argument 2 (templateName)"
>&2 echo "ERROR : missing argument 1 (mode)"
print_usage_and_exit
fi
if [ -z "$EXPNAME" ];
if [ -z "$EXPPATH" ];
then
>&2 echo "ERROR : missing argument 3 (expName)"
>&2 echo "ERROR : missing argument 2 (expPath)"
print_usage_and_exit
fi
shift
shift
shift
shift
if [ "$KEYWORD" = "." ]
then
KEYWORD=""
if [ ! -d "$EXPPATH" ]; then
>&2 echo "ERROR : directory $EXPPATH doesn't exist"
print_usage_and_exit
fi
TRAIN=$(find $UD_ROOT*$KEYWORD -type f -name '*train*.conllu')
TRAINRAW=$(find $UD_ROOT*$KEYWORD -type f -name '*train*.txt')
DEV=$(find $UD_ROOT*$KEYWORD -type f -name '*dev*.conllu')
DEVRAW=$(find $UD_ROOT*$KEYWORD -type f -name '*dev*.txt')
TEST=$(find $UD_ROOT*$KEYWORD -type f -name '*test*.conllu')
TESTRAW=$(find $UD_ROOT*$KEYWORD -type f -name '*test*.txt')
source $EXPPATH"/config"
TRAIN=$(find $CORPUS -type f -name '*train*.conllu')
TRAINRAW=$(find $CORPUS -type f -name '*train*.txt')
DEV=$(find $CORPUS -type f -name '*dev*.conllu')
DEVRAW=$(find $CORPUS -type f -name '*dev*.txt')
TEST=$(find $CORPUS -type f -name '*test*.conllu')
TESTRAW=$(find $CORPUS -type f -name '*test*.txt')
if has_space "$TRAIN" || has_space "$DEV" || has_space "$TEST";
then
......@@ -63,32 +52,23 @@ fi
if test ! -f $TRAIN;
then
>&2 echo "ERROR : no train file found with keyword" $KEYWORD
>&2 echo "ERROR : no train file found in" $CORPUS
>&2 echo "$TRAIN"
print_usage_and_exit
fi
mkdir -p bin
if [ ! -d "$TEMPLATENAME" ]; then
>&2 echo "ERROR : directory $TEMPLATENAME doesn't exist"
print_usage_and_exit
fi
rm -rf bin/$EXPNAME
cp -r $TEMPLATENAME bin/$EXPNAME
cp -r "data" bin/$EXPNAME/.
EVALCONLL="../scripts/conll18_ud_eval.py"
CURDIR=$(pwd)
cd $EXPPATH"/"data && make -s clean && make -s && cd $CURDIR
MCD=$EXPPATH"/data/*\.mcd"
if [ "$MODE" = "tsv" ]; then
macaon train --model bin/$EXPNAME --mcd $MCD --trainTSV $TRAIN --devTSV $DEV $@ || exit 1
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --devTSV $DEV $@ || exit 1
exit 0
fi
if [ "$MODE" = "txt" ]; then
macaon train --model bin/$EXPNAME --mcd $MCD --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW $@ || exit 1
macaon train --model $EXPPATH --mcd $MCD --trainTSV $TRAIN --trainTXT $TRAINRAW --devTSV $DEV --devTXT $DEVRAW $@ || exit 1
exit 0
fi
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment