Skip to content
Snippets Groups Projects
Commit d1337fbf authored by Benoit Favre's avatar Benoit Favre
Browse files

fix steps 3 and 4

parent b2f5a650
No related branches found
No related tags found
No related merge requests found
......@@ -8,5 +8,5 @@ rm -rf deps/semafor
wget https://semafor-semantic-parser.googlecode.com/files/SEMAFOR-2.1.tgz -O SEMAFOR-2.1.tgz
tar -C deps -xf SEMAFOR-2.1.tgz
javac -cp ${classpath} $SEMAFOR_HOME/edu/cmu/cs/lti/ark/fn/identification/*.java
javac -cp ${classpath} $SEMAFOR_HOME/edu/cmu/cs/lti/ark/fn/{identification,parsing}/*.java
......@@ -27,6 +27,7 @@ $jhome/java -classpath ${classpath} -Xms8G -Xms8G \
${datadir} \
${datadir}/alphabet.dat
rm -rf ${datadir}/log
# step 3: creating feature events
$jhome/java -classpath ${classpath} -Xms8G -Xmx8G \
edu.cmu.cs.lti.ark.fn.identification.CreateEventsUnsupported \
......@@ -42,6 +43,7 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \
endindex:${end} \
numthreads:4
rm -rf ${datadir}/log
# step 4: traning the frame identification model
mkdir -p ${datadir}/models_0.0
$jhome/java -classpath ${classpath} -Xms8G -Xmx8G \
......@@ -56,8 +58,9 @@ $jhome/java -classpath ${classpath} -Xms8G -Xmx8G \
numthreads:8
# step 5: convert alphabet files
model=`ls ${datadir}/models_0.0/idmodel.dat_* | sort -n -k3 -t_ | tail -1`
$jhome/java -classpath ${classpath} -Xms8G -Xms8G edu.cmu.cs.lti.ark.fn.identification.ConvertAlphabetFile \
${datadir}/alphabet.dat \
${datadir}/models_0.0/idmodel.dat \
$model \
${datadir}/idmodel.dat
......@@ -2,28 +2,32 @@
. config.sh
mkdir ${datadir}/scan
mkdir -p lrdata
ln -sf ${SEMAFOR_HOME}/file_properties.xml .
ln -sf ${SEMAFOR_HOME}/dict .
ln -sf ${SEMAFOR_HOME}/stopwords.txt lrdata
mkdir -p ${datadir}/scan
# step 1: Alphabet Creation
$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.CreateAlphabet \
${fefile}.frame.elements \
${fefile}.all.lemma.tags \
${datadir}/scan/cv.train.events.bin \
${datadir}/scan/parser.conf.unlabeled \
${datadir}/scan/cv.train.sentences.frame.elements.spans \
true \
false \
1 \
null \
${datadir}/framenet.frame.element.map
#$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.CreateAlphabet \
# ${fefile}.frame.elements \
# ${fefile}.all.lemma.tags \
# ${datadir}/scan/cv.train.events.bin \
# ${datadir}/scan/parser.conf.unlabeled \
# ${datadir}/scan/cv.train.sentences.frame.elements.spans \
# true \
# false \
# 1 \
# null \
# ${datadir}/framenet.frame.element.map
# step 2: Caching Feature Vectors
$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.FrameFeaturesCache \
eventsfile:${datadir}/scan/cv.train.events.bin \
spansfile:${datadir}/scan/cv.train.sentences.frame.elements.spans \
train-framefile:${fefile}.frame.elements \
localfeaturescache:${datadir}/scan/featurecache.jobj
#$jhome/java -classpath ${classpath} -Xms4000m -Xmx4000m edu.cmu.cs.lti.ark.fn.parsing.FrameFeaturesCache \
# eventsfile:${datadir}/scan/cv.train.events.bin \
# spansfile:${datadir}/scan/cv.train.sentences.frame.elements.spans \
# train-framefile:${fefile}.frame.elements \
# localfeaturescache:${datadir}/scan/featurecache.jobj
# step 3: training
$jhome/java -classpath ${classpath} -Xms8000m -Xmx8000m edu.cmu.cs.lti.ark.fn.parsing.TrainingBatchMain \
......
......@@ -60,6 +60,10 @@ def process_fulltext_xml(filename):
continue
target_start, target_end = int(target_label_node.attrib['start']), int(target_label_node.attrib['end'])
target = text[target_start: target_end + 1]
while index[target_start] == -1:
target_start += 1
while index[target_end] == -1:
target_end -=1
target_token_number = '_'.join([str(x) for x in range(index[target_start], index[target_end] + 1)])
#print ' ', frame_name, lexical_unit_name, target, target_token_number
......@@ -68,6 +72,10 @@ def process_fulltext_xml(filename):
frame_element = frame_element_node.attrib['name']
frame_element_start, frame_element_end = int(frame_element_node.attrib['start']), int(frame_element_node.attrib['end'])
#print ' ', frame_element, index[frame_element_start], index[frame_element_end], text[frame_element_start: frame_element_end]
while index[frame_element_start] == -1:
frame_element_start += 1
while index[frame_element_end] == -1:
frame_element_end -= 1
frame_elements.append(frame_element)
frame_elements.append('%d:%d' % (index[frame_element_start], index[frame_element_end]))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment