Skip to content
Snippets Groups Projects
Commit d9968e9f authored by Franck Dary's avatar Franck Dary
Browse files

Added argument pretrained= to train.sh, to chose which column to apply Glove to

parent 6155a966
No related branches found
No related tags found
No related merge requests found
......@@ -69,7 +69,7 @@ texts_lines:
./getRawText.py $(CONLL2LINES) $(TRAIN_FILES) $(DEV_FILES) $(TEST_FILES)
pretrain:
for col in FORM UPOS LEMMA FEATS DEPREL LETTERS ; do \
for col in $(PRETRAINED_COLS) ; do \
./pretrainEmbeddings.sh $(TRAIN_FILES) $$col 128 $$col.w2v 2> pretrain_log.err || ( cat pretrain_log.err && exit 1 ) ; \
done
......
......@@ -8,7 +8,7 @@ from launchSlurmArray import launchSlurmArray
###############################################################################
def printUsageAndExit() :
print("USAGE : %s (train | eval) (gpu | cpu) batchesDescription.py nbHours jobName maxNbSimultaneousJobs"%sys.argv[0], file=sys.stderr)
print("USAGE : %s (train | eval) (gpu | cpu) nbCPU batchesDescription.py nbHours jobName maxNbSimultaneousJobs"%sys.argv[0], file=sys.stderr)
exit(1)
###############################################################################
......@@ -19,31 +19,32 @@ def prepareExperiment(lang, template, expName) :
###############################################################################
###############################################################################
def addNamesAndCommandsTrain(names, commands, mode, expName, arguments, seed) :
def addNamesAndCommandsTrain(names, commands, mode, expName, arguments, seed, pretrained) :
names.append(expName)
commands.append("./train.sh {} bin/{} {} --silent --seed {}".format(mode, expName,arguments,seed))
commands.append("./train.sh {} bin/{} pretrained={} {} --silent --seed {}".format(mode, expName,pretrained,arguments,seed))
###############################################################################
###############################################################################
def addNamesAndCommandsDecode(names, commands, mode, expName, arguments) :
def addNamesAndCommandsDecode(names, commands, mode, expName, arguments, pretrained) :
names.append(expName)
commands.append("./evaluate.sh {} bin/{} --silent {}".format(mode, expName, arguments))
commands.append("./evaluate.sh {} bin/{} pretrained={} --silent {}".format(mode, expName, pretrained,arguments))
###############################################################################
###############################################################################
if __name__ == "__main__" :
if len(sys.argv) != 7 :
if len(sys.argv) != 8 :
printUsageAndExit()
mode = sys.argv[1]
device = sys.argv[2]
batchesDescription = sys.argv[3]
nbHours = sys.argv[4]
name = sys.argv[5]
limit = sys.argv[6]
nbCPU = sys.argv[3]
batchesDescription = sys.argv[4]
nbHours = sys.argv[5]
name = sys.argv[6]
limit = sys.argv[7]
if mode not in ["train","eval"] or device not in ["cpu","gpu"] :
printUsageAndExit()
......@@ -55,15 +56,16 @@ if __name__ == "__main__" :
for lang in desc.langs :
for xp in desc.templatesExperiments :
pretrained = xp['pretrained'] if "pretrained" in xp else ""
for i in desc.repRange :
xp['lang'] = lang
xp['expName'] = xp['expName'].split('.')[0]+"."+lang+"."+str(i)
if mode == "train" :
prepareExperiment(xp['lang'],xp['template'],xp['expName'])
addNamesAndCommandsTrain(names, commands, xp['mode'],xp['expName'],xp['arguments'],seed=100+i)
addNamesAndCommandsTrain(names, commands, xp['mode'],xp['expName'],xp['arguments'],seed=100+i, pretrained=pretrained)
else :
addNamesAndCommandsDecode(names, commands, xp['mode'],xp['expName'],xp['evalArguments'])
addNamesAndCommandsDecode(names, commands, xp['mode'],xp['expName'],xp['evalArguments'], pretrained=pretrained)
launchSlurmArray(names, commands, name, device, nbHours, limit)
launchSlurmArray(names, commands, name, device, nbHours, limit, nbCPU)
###############################################################################
......@@ -4,9 +4,9 @@ Classifier : tagger
Transitions : {tagger,data/tagger.ts}
LossMultiplier : {}
Network type : Modular
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{64} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2}
Contextual : Window{-10 10} Columns{FORM} LSTM{1 1 0 1} In{128} Out{128} w2v{FORM,data/FORM.w2v} Targets{b.-2 b.-1 b.0 b.1 b.2}
Context : Targets{b.-2 b.-1 b.0 b.1 b.2} Columns{EOS ID} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Context : Targets{b.-3 b.-2 b.-1} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{UPOS,data/UPOS.w2v}
Context : Targets{b.-3 b.-2 b.-1} Columns{UPOS} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{prefix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
Focused : Column{suffix3:FORM} NbElem{3} Buffer{0} Stack{} LSTM{1 1 0 1} In{64} Out{64} w2v{}
InputDropout : 0.3
......
#! /usr/bin/env bash
function print_usage_and_exit {
>&2 echo "USAGE : (tsv | txt) expPath [arguments]"
>&2 echo "USAGE : (tsv | txt) expPath pretrained=col1,col2... [arguments]"
exit 1
}
MODE=$1
EXPPATH=$2
PRETRAINED=$3
>&2 echo "********************************************************************************"
>&2 echo "Training : "$EXPPATH
......@@ -23,6 +24,15 @@ then
print_usage_and_exit
fi
if [ -z "$PRETRAINED" ];
then
>&2 echo "ERROR : missing argument 3 (pretrained)"
print_usage_and_exit
fi
PRETRAINED=$(python3 -c 'import sys; print(" ".join(sys.argv[-1].split("=")[-1].split(",")))' $PRETRAINED)
shift
shift
shift
......@@ -38,7 +48,7 @@ then
fi
CURDIR=$(pwd)
cd $EXPPATH"/"data && make -s clean && make $TARGET -s
cd $EXPPATH"/"data && make -s clean && PRETRAINED_COLS=$PRETRAINED make $TARGET -s
cd $CURDIR
TRAIN=$EXPPATH"/data/train.conllu"
......
import os
import subprocess
def launchSlurmArray(names, commands, jobName, device, nbHours, limit) :
def launchSlurmArray(names, commands, jobName, device, nbHours, limit, nbCPU) :
commands = ["'%s'"%s for s in commands]
names = ["'%s'"%s for s in names]
......@@ -21,7 +21,7 @@ def launchSlurmArray(names, commands, jobName, device, nbHours, limit) :
#SBATCH --error=%A_%a.err
#SBATCH --open-mode=append
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --cpus-per-task={}
#SBATCH --partition=skylake
#SBATCH -A b250
#SBATCH --time={}:00:00
......@@ -55,7 +55,7 @@ cat $tmpFile >> $newErr
rm $tmpFile
eval "${{commands[$SLURM_ARRAY_TASK_ID]}}"
""".format(len(names)-1, limit, jobName, nbHours, " ".join(names), " ".join(commands)), file=sFile)
""".format(len(names)-1, limit, jobName, nbCPU, nbHours, " ".join(names), " ".join(commands)), file=sFile)
sFile.close()
elif "jean-zay" in hostname :
print("""#! /usr/bin/env bash
......@@ -111,7 +111,7 @@ eval "${{commands[$SLURM_ARRAY_TASK_ID]}}"
#SBATCH --error=%A_%a.err
#SBATCH --open-mode=append
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --cpus-per-task={}
#SBATCH --partition={}
#SBATCH --time={}:00:00
......@@ -141,7 +141,7 @@ cat $tmpFile >> $newErr
rm $tmpFile
eval "${{commands[$SLURM_ARRAY_TASK_ID]}}"
'''.format(len(names)-1, limit, jobName, "cpu" if device == "cpu" else "gpu\n#SBATCH --gres=gpu", nbHours, " ".join(names), commandList), file=sFile)
'''.format(len(names)-1, limit, jobName, nbCPU, "cpu" if device == "cpu" else "gpu\n#SBATCH --gres=gpu", nbHours, " ".join(names), commandList), file=sFile)
sFile.close()
else :
print("ERROR : Unknown hostname \'%s\'"%hostname)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment