From b2f5a650eb11ae109eb05c1a217f6f5e3799a2a3 Mon Sep 17 00:00:00 2001 From: Benoit Favre <benoit.favre@gmail.com> Date: Thu, 31 Jul 2014 15:32:26 +0200 Subject: [PATCH] fix paths, add step 4 --- 00-install-deps.sh | 6 +++-- 02-create-more-data-structures.sh | 4 +++ 03-train-frame-identification.sh | 12 ++++++--- 04-train-argument-identification.sh | 38 +++++++++++++++++++++++++++++ config.sh | 6 ++--- 5 files changed, 58 insertions(+), 8 deletions(-) create mode 100755 04-train-argument-identification.sh diff --git a/00-install-deps.sh b/00-install-deps.sh index cfac195..2d8a28f 100755 --- a/00-install-deps.sh +++ b/00-install-deps.sh @@ -4,7 +4,9 @@ rm -rf deps/semafor -git clone https://github.com/sammthomson/semafor.git deps/semafor +#git clone https://github.com/sammthomson/semafor.git deps/semafor +wget https://semafor-semantic-parser.googlecode.com/files/SEMAFOR-2.1.tgz -O SEMAFOR-2.1.tgz +tar -C deps -xf SEMAFOR-2.1.tgz -javac -cp ${classpath} deps/semafor/edu/cmu/cs/lti/ark/fn/identification/*.java +javac -cp ${classpath} $SEMAFOR_HOME/edu/cmu/cs/lti/ark/fn/identification/*.java diff --git a/02-create-more-data-structures.sh b/02-create-more-data-structures.sh index 59550e6..75c39e2 100755 --- a/02-create-more-data-structures.sh +++ b/02-create-more-data-structures.sh @@ -2,6 +2,8 @@ . config.sh +pushd $SEMAFOR_HOME + ${jhome}/java -classpath ${classpath}:. -Xms2g -Xmx2g edu.cmu.cs.lti.ark.fn.identification.RequiredDataCreation \ stopwords-file:${SEMAFOR_HOME}/stopwords.txt \ wordnet-configfile:${SEMAFOR_HOME}/file_properties.xml \ @@ -14,3 +16,5 @@ ${jhome}/java -classpath ${classpath}:. -Xms2g -Xmx2g edu.cmu.cs.lti.ark.fn.iden revisedmapfile:${datadir}/revisedrelmap.ser \ lemmacachefile:${datadir}/hvlemmas.ser \ fnidreqdatafile:${datadir}/reqData.jobj + +popd diff --git a/03-train-frame-identification.sh b/03-train-frame-identification.sh index 60e21c7..77853b9 100755 --- a/03-train-frame-identification.sh +++ b/03-train-frame-identification.sh @@ -22,7 +22,10 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ numthreads:4 # step 2: combine alphabets? -$jhome/java -classpath ${classpath} -Xms8G -Xms8G edu.cmu.cs.lti.ark.fn.identification.CombineAlphabets ${datadir} ${datadir}/alphabet.dat +$jhome/java -classpath ${classpath} -Xms8G -Xms8G \ + edu.cmu.cs.lti.ark.fn.identification.CombineAlphabets \ + ${datadir} \ + ${datadir}/alphabet.dat # step 3: creating feature events $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ @@ -40,7 +43,7 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ numthreads:4 # step 4: traning the frame identification model -mkdir ${datadir}/models_0.0 +mkdir -p ${datadir}/models_0.0 $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ edu.cmu.cs.lti.ark.fn.identification.TrainBatchModelDerThreaded \ alphabetfile:${datadir}/alphabet.dat \ @@ -53,5 +56,8 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ numthreads:8 # step 5: convert alphabet files -$jhome/java -classpath ${classpath} -Xms8G -Xms8G edu.cmu.cs.lti.ark.fn.identification.ConvertAlphabetFile ${datadir}/alphabet.dat ${datadir}/models_0.0/idmodel.dat ${datadir}/idmodel.dat +$jhome/java -classpath ${classpath} -Xms8G -Xms8G edu.cmu.cs.lti.ark.fn.identification.ConvertAlphabetFile \ + ${datadir}/alphabet.dat \ + ${datadir}/models_0.0/idmodel.dat \ + ${datadir}/idmodel.dat diff --git a/04-train-argument-identification.sh b/04-train-argument-identification.sh new file mode 100755 index 0000000..c49788a --- /dev/null +++ b/04-train-argument-identification.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +. config.sh + +mkdir ${datadir}/scan + +# step 1: Alphabet Creation +$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.CreateAlphabet \ + ${fefile}.frame.elements \ + ${fefile}.all.lemma.tags \ + ${datadir}/scan/cv.train.events.bin \ + ${datadir}/scan/parser.conf.unlabeled \ + ${datadir}/scan/cv.train.sentences.frame.elements.spans \ + true \ + false \ + 1 \ + null \ + ${datadir}/framenet.frame.element.map + + +# step 2: Caching Feature Vectors +$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.FrameFeaturesCache \ + eventsfile:${datadir}/scan/cv.train.events.bin \ + spansfile:${datadir}/scan/cv.train.sentences.frame.elements.spans \ + train-framefile:${fefile}.frame.elements \ + localfeaturescache:${datadir}/scan/featurecache.jobj + +# step 3: training +$jhome/java -classpath ${classpath} -Xms8000m -Xmx8000m edu.cmu.cs.lti.ark.fn.parsing.TrainingBatchMain \ + model:${datadir}/argmodel.dat \ + alphabetfile:${datadir}/scan/parser.conf.unlabeled \ + localfeaturescache:${datadir}/scan/featurecache.jobj \ + train-framefile:${fefile}.frame.elements \ + regularization:reg \ + lambda:0.1 \ + numthreads:4 \ + binaryoverlapfactor:false + diff --git a/config.sh b/config.sh index 341d1d8..405579a 100644 --- a/config.sh +++ b/config.sh @@ -2,9 +2,9 @@ set -u -e -o pipefail -SEMAFOR_HOME=$PWD/deps/semafor -classpath=${SEMAFOR_HOME}/lib/semafor-deps.jar -datadir=data/training +SEMAFOR_HOME=$PWD/deps/semafor-semantic-parser +classpath=${SEMAFOR_HOME}/lib/semafor-deps.jar:${SEMAFOR_HOME} +datadir=$PWD/data framenet=/storage/raid1/corpora/fndata-1.5 luxmldir=$framenet/lu jhome=/usr/bin/ -- GitLab