diff --git a/UD_X/.gitignore b/UD_X/.gitignore index 261e016b0e5343b08fc5ab162bdc189c879b69ec..abcaa7fa04cc112bfdd0ffcd0224ec76a84dadf1 100644 --- a/UD_X/.gitignore +++ b/UD_X/.gitignore @@ -14,7 +14,6 @@ parser/test.bd eval/*\.res eval/stderr.log parser/*.bd -launch* outputCluster/ launchFiles/ predictions/ diff --git a/UD_X/launchFTB.sh b/UD_X/launchFTB.sh new file mode 100755 index 0000000000000000000000000000000000000000..6fd7a02198f1f5efe4344f95e56a945d536fa5f7 --- /dev/null +++ b/UD_X/launchFTB.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +set -e +set -u +set -o pipefail + +listPercent="01 03 05 10 20 50 100" + + +sed -i -E "s/^TEST=.+/TEST=..\/..\/data\/UD_French-FTB-full-2.3\/fr_ftb-ud-test.conllu/" data/Makefile +sed -i -E "s/^DEV=.+/DEV=..\/..\/data\/UD_French-FTB-full-2.3\/fr_ftb-ud-dev.conllu/" data/Makefile + + +# for curPercent in $listPercent +# do +# sed -i -E "s/^TRAIN=.+/TRAIN=..\/..\/data\/UD_French-FTB-full-2.3\/ftb.train."$curPercent"percent.conllu/" data/Makefile +# NAMEMODEL=FTB_${curPercent}_lex100 +# cd data +# make clean & make without_w +# sed -i -E "s/Parser_form.+$/Parser_form 100 Embeddings ..\/..\/..\/data\/embeddings\/monolingual\/dicts\/fr.dict/" ../parser/parser.dicts +# echo "b.0.FORM" >> ../parser/parser.fm + +# mv train.mcf MCFfiles/train/train_$NAMEMODEL.mcf +# mv dev.mcf MCFfiles/dev/dev_$NAMEMODEL.mcf +# mv test.mcf MCFfiles/test/test_$NAMEMODEL.mcf +# mv wpmlgfs.mcd MCDfiles/$NAMEMODEL.mcd + +# cd ../parser + +# mv train.bd dir_bd/train_$NAMEMODEL.bd +# mv test.bd dir_bd/test_$NAMEMODEL.bd +# mv parser.dicts dicts/$NAMEMODEL.dicts + +# mv parser.fm fm/$NAMEMODEL.fm + +# oargen -H see4c1 -n ${NAMEMODEL} -d ../outputCluster -t 96 -b --run macaon_train --tm machine.tm --bd dir_bd/train_$NAMEMODEL.bd --mcd ../../data/MCDfiles/$NAMEMODEL.mcd -T ../../data/MCFfiles/train/train_$NAMEMODEL.mcf --dev ../../data/MCFfiles/dev/dev_$NAMEMODEL.mcf --dicts dicts/$NAMEMODEL.dicts --featureModels Parser=fm/$NAMEMODEL.fm --expName parser_$NAMEMODEL --lang UD_X -n 10 --templateName parser + +# sleep 10 + +# cd ../ +# NAMEMODEL=FTB_${curPercent} +# cd data +# make clean & make without_w +# mv train.mcf MCFfiles/train/train_$NAMEMODEL.mcf +# mv dev.mcf MCFfiles/dev/dev_$NAMEMODEL.mcf +# mv test.mcf MCFfiles/test/test_$NAMEMODEL.mcf +# mv wpmlgfs.mcd MCDfiles/$NAMEMODEL.mcd + +# cd ../parser + +# mv train.bd dir_bd/train_$NAMEMODEL.bd +# mv test.bd dir_bd/test_$NAMEMODEL.bd +# mv parser.dicts dicts/$NAMEMODEL.dicts + +# mv parser.fm fm/$NAMEMODEL.fm + +# oargen -H see4c1 -n ${NAMEMODEL} -d ../outputCluster -t 96 -b --run macaon_train --tm machine.tm --bd dir_bd/train_$NAMEMODEL.bd --mcd ../../data/MCDfiles/$NAMEMODEL.mcd -T ../../data/MCFfiles/train/train_$NAMEMODEL.mcf --dev ../../data/MCFfiles/dev/dev_$NAMEMODEL.mcf --dicts dicts/$NAMEMODEL.dicts --featureModels Parser=fm/$NAMEMODEL.fm --expName parser_$NAMEMODEL --lang UD_X -n 10 --templateName parser +# sleep 10 + + +# cd ../ +# done +curPercent="100" + +sed -i -E "s/^TRAIN=.+/TRAIN=..\/..\/data\/UD_French-FTB-full-2.3\/ftb.train."$curPercent"percent.conllu/" data/Makefile +NAMEMODEL=FTB_${curPercent}_newParam +cd data +make clean & make without_w +sed -i -E "s/Parser_form.+$/Parser_form 100 Embeddings ..\/..\/..\/data\/embeddings\/monolingual\/dicts\/fr.dict/" ../parser/parser.dicts + +mv train.mcf MCFfiles/train/train_$NAMEMODEL.mcf +mv dev.mcf MCFfiles/dev/dev_$NAMEMODEL.mcf +mv test.mcf MCFfiles/test/test_$NAMEMODEL.mcf +mv wpmlgfs.mcd MCDfiles/$NAMEMODEL.mcd + +cd ../parser + +mv train.bd dir_bd/train_$NAMEMODEL.bd +mv test.bd dir_bd/test_$NAMEMODEL.bd +mv parser.dicts dicts/$NAMEMODEL.dicts + +mv parser.fm fm/$NAMEMODEL.fm + +oargen -H see4c1 -n ${NAMEMODEL} -d ../outputCluster -t 96 -b --run macaon_train --tm machine.tm --bd dir_bd/train_$NAMEMODEL.bd --mcd ../../data/MCDfiles/$NAMEMODEL.mcd -T ../../data/MCFfiles/train/train_$NAMEMODEL.mcf --dev ../../data/MCFfiles/dev/dev_$NAMEMODEL.mcf --dicts dicts/$NAMEMODEL.dicts --featureModels Parser=fm/$NAMEMODEL.fm --expName parser_$NAMEMODEL --lang UD_X -n 10 --templateName parser + +cd ../ + +sed -i -E "s/^TRAIN=.+/TRAIN=\$(CONLL_DIR)\/train.eqTokens.conllu/" data/Makefile +sed -i -E "s/^TEST=.+/TEST=\$(CONLL_DIR)\/test.conllu/" data/Makefile +sed -i -E "s/^DEV=.+/DEV=\$(CONLL_DIR)\/dev.eqTokens.conllu/" data/Makefile diff --git a/UD_X/launchFamily.sh b/UD_X/launchFamily.sh new file mode 100755 index 0000000000000000000000000000000000000000..d9694e276bad7bfe513cd5935bd4dcc348a1c64a --- /dev/null +++ b/UD_X/launchFamily.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e +set -u +set -o pipefail + +# oargen -n Romance -d outputCluster -t 48 -b --run ./trainNewVersion.sh -t Family -l Romance +# sleep 300 +oargen -n Germanic -d outputCluster -t 48 -b --run ./trainNewVersion.sh -t Family -l Germanic +sleep 300 +# oargen -n Slavic -d outputCluster -t 48 -b --run ./trainNewVersion.sh -t Family -l Slavic +# sleep 300 diff --git a/UD_X/launchID.sh b/UD_X/launchID.sh new file mode 100755 index 0000000000000000000000000000000000000000..48cb310c70506f53719d85e349f514a5e8a369dd --- /dev/null +++ b/UD_X/launchID.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e +set -u +set -o pipefail + +listLang="ar bg bxr ca cs da de el en es et eu fa fi fr ga he hi hr hu id it ja kmr ko lv nl no pl pt ro ru sl sme sv tr uk ur vi zh" + +oargen -n Lbar_${curLang} -d outputCluster -t 72 -H see4c1 --run ./train.sh -t SIGMA -i +oargen -n SIGMA_ID_bis -d outputCluster -t 72 -H see4c1 --run macaon_train --tm machine.tm --bd dir_bd/train_SIGMA_ID.bd --mcd ../../data/MCDfiles/SIGMA_ID.mcd -T ../../data/MCFfiles/train/train_SIGMA_ID.mcf --dev ../../data/MCFfiles/dev/dev_SIGMA_ID.mcf --dicts dicts/SIGMA_ID.dicts --featureModels Parser=fm/SIGMA_ID.fm --expName parser_SIGMA_ID_bis --lang UD_X -n 10 --templateName parser diff --git a/UD_X/launchLbar.sh b/UD_X/launchLbar.sh new file mode 100755 index 0000000000000000000000000000000000000000..cd0464520483afb6b8be872926770a23761bb434 --- /dev/null +++ b/UD_X/launchLbar.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e +set -u +set -o pipefail + +#listLang="ar bg bxr ca cs da de el en es et eu fa fi fr ga he hi hr hu id it ja kmr ko lv nl no pl pt ro ru sl sme sv tr uk ur vi zh" +listLang="nl no pl pt ro ru sl sv tr uk ur vi zh" + +# # Lbar +# for curLang in $listLang +# do +# oargen -n Lbar_${curLang} -d outputCluster -t 72 -H see4c1 lifnode1 asfalda1 --run ./train.sh -t Lbar -l ${curLang} +# sleep 180 +# done + + +# Lbar+W80 +for curLang in $listLang +do + oargen -n Lbar_${curLang}_W80 -d outputCluster -t 72 -H see4c1 lifnode1 asfalda1 --run ./train.sh -t Lbar -l ${curLang} -w 80 + sleep 180 +done diff --git a/UD_X/launchTestsLex.sh b/UD_X/launchTestsLex.sh new file mode 100755 index 0000000000000000000000000000000000000000..0512adf174fae05ce8dd0f92aa5e01b4dc125fc5 --- /dev/null +++ b/UD_X/launchTestsLex.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -e +set -u +set -o pipefail + +# listlang="ar bg bxr ca cu cs da de el es et eu fa fi fr ga gl got he hi hr hu id it ja kk kmr ko lv nl no nob nno pl pt ro ru sk sl sme sv tr ug uk ur vi zh" +# listLang="fr ga he hi hr hu id it ja kmr ko lv nl no pl pt ro ru sl sme sv tr uk ur vi zh" + +listLang="ar bg bxr ca cs da de el en es et eu fa fi fr ga he hi hr hu id it ja kmr ko lv nl nno nob pl pt ro ru sl sme sv tr uk ur vi zh" + +# listLang="he hi hr hu id it ja kmr ko lv nl nno nob pl pt ro ru sl sme sv tr uk ur vi zh" + + +for curLang in $listLang +do + CUREXPE=L_${curLang} + oargen -n ${CUREXPE}_lex300align -d outputCluster/LEX -t 24 -b --run ./trainNewVersion.sh -t L -l $curLang -e 300 + sleep 1000 + oargen -n ${CUREXPE} -d outputCluster/LEX -t 24 -b --run ./trainNewVersion.sh -t L -l $curLang + sleep 1000 + CUREXPE=Lbar_${curLang} + oargen -n ${CUREXPE} -d outputCluster/LEX -t 96 -b --run ./trainNewVersion.sh -t Lbar -l $curLang + sleep 1000 + CUREXPE=Lbar_${curLang}_lex300 + oargen -n ${CUREXPE} -d outputCluster/LEX -t 96 -b --run ./trainNewVersion.sh -t Lbar -l $curLang -e 300 + sleep 1000 +done + + +# sleep 300 +# oargen -n SIGMA -d outputCluster -t 96 -b --run ./trainNewVersion.sh -t SIGMA +# sleep 300 +# oargen -n SIGMA_W80 -d outputCluster -t 96 -b --run ./trainNewVersion.sh -t SIGMA -w 80 +# sleep 300 +# oargen -n SIGMA_ID -d outputCluster -t 96 -b --run ./trainNewVersion.sh -t SIGMA -i +# sleep 300 +# oargen -H see4c1 -n SIGMA_Wd -d outputCluster -t 96 -b --run ./trainNewVersion.sh -t SIGMA -w d +# sleep 300 +# oargen -H see4c1 -n SIGMA_Wdf -d outputCluster -t 96 -b --run ./trainNewVersion.sh -t SIGMA -w df diff --git a/UD_X/launchWd_Wdf.sh b/UD_X/launchWd_Wdf.sh new file mode 100755 index 0000000000000000000000000000000000000000..b9e2b08ad1873981cb83dc179ffed0f4639c8c8c --- /dev/null +++ b/UD_X/launchWd_Wdf.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e +set -u +set -o pipefail + +# SIGMA+Wd +oargen -n SIGMA_Wd -d outputCluster -t 96 --run ./train.sh -w d +sleep 500 + +# SIGMA+Wdf +oargen -n SIGMA_Wdf -d outputCluster -t 96 --run ./train.sh -w df +