diff --git a/00-install-deps.sh b/00-install-deps.sh index cfac19544efd5c4293461d8cd5ecbb524eb137ec..2d8a28fc0c22e504acd1fd24759cf07d38a5ca86 100755 --- a/00-install-deps.sh +++ b/00-install-deps.sh @@ -4,7 +4,9 @@ rm -rf deps/semafor -git clone https://github.com/sammthomson/semafor.git deps/semafor +#git clone https://github.com/sammthomson/semafor.git deps/semafor +wget https://semafor-semantic-parser.googlecode.com/files/SEMAFOR-2.1.tgz -O SEMAFOR-2.1.tgz +tar -C deps -xf SEMAFOR-2.1.tgz -javac -cp ${classpath} deps/semafor/edu/cmu/cs/lti/ark/fn/identification/*.java +javac -cp ${classpath} $SEMAFOR_HOME/edu/cmu/cs/lti/ark/fn/identification/*.java diff --git a/02-create-more-data-structures.sh b/02-create-more-data-structures.sh index 59550e6c6e1959760ba321ce915478c2680cb22b..75c39e2c7453b46c395717d9d92ba92263291301 100755 --- a/02-create-more-data-structures.sh +++ b/02-create-more-data-structures.sh @@ -2,6 +2,8 @@ . config.sh +pushd $SEMAFOR_HOME + ${jhome}/java -classpath ${classpath}:. -Xms2g -Xmx2g edu.cmu.cs.lti.ark.fn.identification.RequiredDataCreation \ stopwords-file:${SEMAFOR_HOME}/stopwords.txt \ wordnet-configfile:${SEMAFOR_HOME}/file_properties.xml \ @@ -14,3 +16,5 @@ ${jhome}/java -classpath ${classpath}:. -Xms2g -Xmx2g edu.cmu.cs.lti.ark.fn.iden revisedmapfile:${datadir}/revisedrelmap.ser \ lemmacachefile:${datadir}/hvlemmas.ser \ fnidreqdatafile:${datadir}/reqData.jobj + +popd diff --git a/03-train-frame-identification.sh b/03-train-frame-identification.sh index 60e21c727739817fceb863901fe38711e4430fc4..77853b9ba800f30c5ecd06ba7193ed5098708210 100755 --- a/03-train-frame-identification.sh +++ b/03-train-frame-identification.sh @@ -22,7 +22,10 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ numthreads:4 # step 2: combine alphabets? -$jhome/java -classpath ${classpath} -Xms8G -Xms8G edu.cmu.cs.lti.ark.fn.identification.CombineAlphabets ${datadir} ${datadir}/alphabet.dat +$jhome/java -classpath ${classpath} -Xms8G -Xms8G \ + edu.cmu.cs.lti.ark.fn.identification.CombineAlphabets \ + ${datadir} \ + ${datadir}/alphabet.dat # step 3: creating feature events $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ @@ -40,7 +43,7 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ numthreads:4 # step 4: traning the frame identification model -mkdir ${datadir}/models_0.0 +mkdir -p ${datadir}/models_0.0 $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ edu.cmu.cs.lti.ark.fn.identification.TrainBatchModelDerThreaded \ alphabetfile:${datadir}/alphabet.dat \ @@ -53,5 +56,8 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \ numthreads:8 # step 5: convert alphabet files -$jhome/java -classpath ${classpath} -Xms8G -Xms8G edu.cmu.cs.lti.ark.fn.identification.ConvertAlphabetFile ${datadir}/alphabet.dat ${datadir}/models_0.0/idmodel.dat ${datadir}/idmodel.dat +$jhome/java -classpath ${classpath} -Xms8G -Xms8G edu.cmu.cs.lti.ark.fn.identification.ConvertAlphabetFile \ + ${datadir}/alphabet.dat \ + ${datadir}/models_0.0/idmodel.dat \ + ${datadir}/idmodel.dat diff --git a/04-train-argument-identification.sh b/04-train-argument-identification.sh new file mode 100755 index 0000000000000000000000000000000000000000..c49788a0af66defbc0235a6899b1202d4848ac45 --- /dev/null +++ b/04-train-argument-identification.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +. config.sh + +mkdir ${datadir}/scan + +# step 1: Alphabet Creation +$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.CreateAlphabet \ + ${fefile}.frame.elements \ + ${fefile}.all.lemma.tags \ + ${datadir}/scan/cv.train.events.bin \ + ${datadir}/scan/parser.conf.unlabeled \ + ${datadir}/scan/cv.train.sentences.frame.elements.spans \ + true \ + false \ + 1 \ + null \ + ${datadir}/framenet.frame.element.map + + +# step 2: Caching Feature Vectors +$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.FrameFeaturesCache \ + eventsfile:${datadir}/scan/cv.train.events.bin \ + spansfile:${datadir}/scan/cv.train.sentences.frame.elements.spans \ + train-framefile:${fefile}.frame.elements \ + localfeaturescache:${datadir}/scan/featurecache.jobj + +# step 3: training +$jhome/java -classpath ${classpath} -Xms8000m -Xmx8000m edu.cmu.cs.lti.ark.fn.parsing.TrainingBatchMain \ + model:${datadir}/argmodel.dat \ + alphabetfile:${datadir}/scan/parser.conf.unlabeled \ + localfeaturescache:${datadir}/scan/featurecache.jobj \ + train-framefile:${fefile}.frame.elements \ + regularization:reg \ + lambda:0.1 \ + numthreads:4 \ + binaryoverlapfactor:false + diff --git a/config.sh b/config.sh index 341d1d8b369cfc3e35fc3f57d011f1285d7a7faf..405579a4ac5a968e32a64dc40aec8d34302a11bf 100644 --- a/config.sh +++ b/config.sh @@ -2,9 +2,9 @@ set -u -e -o pipefail -SEMAFOR_HOME=$PWD/deps/semafor -classpath=${SEMAFOR_HOME}/lib/semafor-deps.jar -datadir=data/training +SEMAFOR_HOME=$PWD/deps/semafor-semantic-parser +classpath=${SEMAFOR_HOME}/lib/semafor-deps.jar:${SEMAFOR_HOME} +datadir=$PWD/data framenet=/storage/raid1/corpora/fndata-1.5 luxmldir=$framenet/lu jhome=/usr/bin/