diff --git a/d6/data/treebank/Makefile b/d6/data/treebank/Makefile index c10a48efe2c8db9d68b88c09f86ec9854fa5d994..5c9026d6b92a90f9f5509be6452d202d5549ec1e 100644 --- a/d6/data/treebank/Makefile +++ b/d6/data/treebank/Makefile @@ -1,20 +1,19 @@ -TOOLS=../../tools/ -D6_DIR=../../../data/ptb_d6 -TRAIN=$(D6_DIR)/d6.train.conll07 -#TEST=$(D6_DIR)/ftb.test.conll07 -DEV=$(D6_DIR)/d6.dev.conll07 +TOOLS=../../../tools +PTB_DIR=../../../data/ptb_d6 +TRAIN=$(PTB_DIR)/d6.train.conll07 +TEST=$(PTB_DIR)/ptb.test.conll07 +DEV=$(PTB_DIR)/d6.dev.conll07 -compile: train.conll07 dev.conll07 +compile: dev.mcf train.mcf #test.mcf -dev.conll07: $(DEV) - cp $< $@ +dev.mcf: $(DEV) + $(TOOLS)/conll2mcf -f $< -1W -2C -3P -4L -5H -6D > $@ -train.conll07: $(TRAIN) - cp $< $@ +test.mcf: $(TEST) + $(TOOLS)/conll2mcf -f $< -1W -2C -3P -4L -5H -6D > $@ -test.conll07: $(TEST) - cp $< $@ +train.mcf: $(TRAIN) + $(TOOLS)/conll2mcf -f $< -1W -2C -3P -4L -5H -6D > $@ clean: - - rm -f train.conll07 - - rm -f dev.conll07 + - rm test.mcf train.mcf dev.mcf diff --git a/d6/maca_trans_parser/Makefile b/d6/maca_trans_parser/Makefile index 0488c8403a24a273dccaf4b874236d5b7fe6804e..86b9cbf7e7ceebaa0c8664a8603322797494b8f2 100644 --- a/d6/maca_trans_parser/Makefile +++ b/d6/maca_trans_parser/Makefile @@ -1,52 +1,17 @@ -CONLL07TRAIN=../data/treebank/train.conll07 -CONLL07DEV=../data/treebank/dev.conll07 -CONLL07TEST=../data/treebank/test.conll07 +MCF_TRAIN=../data/treebank/train.mcf +MCF_DEV=../data/treebank/dev.mcf +MCF_TEST=../data/treebank/test.mcf CFF_TRAIN=train.cff CFF_CUTOFF_TRAIN=train.cutoff.cff PERCEPTRON_ITERATIONS=5 -CFF_CUTOFF=1 +CFF_CUTOFF=3 FEATURES_MODEL_FILENAME=maca_trans_parser.fm -MCD_FILENAME=maca_trans_parser.mcd VOCABS_FILENAME=maca_trans_parser.vocab MODEL_FILENAME=maca_trans_parser.model NUMBER_OF_SENTENCES=10000000 -STREAM_MODE=0 - - -##----------------------------------------------------------------------- -## compile -##----------------------------------------------------------------------- - -compile: $(MODEL_FILENAME) - -$(CFF_TRAIN): $(CONLL07TRAIN) - maca_trans_parser_conll2cff --conll $< --mode TRAIN --mcd $(MCD_FILENAME) --feat_model $(FEATURES_MODEL_FILENAME) --vocabs $(VOCABS_FILENAME) --cff $@ -s $(NUMBER_OF_SENTENCES) -T $(STREAM_MODE) - -$(CFF_CUTOFF_TRAIN): $(CFF_TRAIN) - maca_trans_parser_cff_cutoff --cff $< --vocabs $(VOCABS_FILENAME) --cutoff $(CFF_CUTOFF) > $@ - -#$(MODEL_FILENAME): $(CFF_CUTOFF_TRAIN) -$(MODEL_FILENAME): $(CFF_TRAIN) - maca_trans_parser_train_from_cff --cff $< --model $(MODEL_FILENAME) -n $(PERCEPTRON_ITERATIONS) - -##----------------------------------------------------------------------- -## install -##----------------------------------------------------------------------- - -install: - - cp $(FEATURES_MODEL_FILENAME) ../bin - - cp $(VOCABS_FILENAME) ../bin - - cp $(MODEL_FILENAME) ../bin - -##----------------------------------------------------------------------- -## clean -##----------------------------------------------------------------------- - -clean: - - rm -f $(VOCABS_FILENAME) - - rm -f $(MODEL_FILENAME) - - rm -f $(CFF_TRAIN) - - rm -f $(CFF_CUTOFF_TRAIN) - +#NUMBER_OF_SENTENCES=1000 +MCD_FILENAME=wplgfs.mcd +STREAM_MODE= -S +include ../../makefiles/maca_trans_parser.makefile diff --git a/en/data/morpho-lexicon/add_to_fplm.txt b/en/data/morpho-lexicon/add_to_fplm.txt index 68a9ede5723dcd7eae509ef08a8b1f3fdc0962e0..b25eb3a2ab5680fcd5c1d7fb81b52d255cd823e4 100644 --- a/en/data/morpho-lexicon/add_to_fplm.txt +++ b/en/data/morpho-lexicon/add_to_fplm.txt @@ -1,62 +1,306 @@ n't RB not ##### -according to IN according to ##### -ahead of IN ahead of ##### -along with IN along with ##### -apart from IN apart from ##### -as for IN as for ##### -as well as IN as well as ##### -aside from IN aside from ##### -at the bottom of IN at the bottom of ##### -at the top of IN at the top of ##### -away from IN away from ##### -because of IN because of ##### -but for IN but for ##### -by means of IN by means of ##### -by virtue of IN by virtue of ##### -by way of IN by way of ##### -close to IN close to ##### -contrary to IN contrary to ##### -due to IN due to ##### -except for IN except for ##### -far from IN far from ##### -for lack of IN for lack of ##### -in accordance with IN in accordance with ##### -in addition to IN in addition to ##### -in back of IN in back of ##### -in between IN in between ##### -in the case of IN in the case of ##### -in case of IN in case of ##### -in charge of IN in charge of ##### -in exchange for IN in exchange for ##### -in front of IN in front of ##### -in light of IN in light of ##### -in line with IN in line with ##### -in place of IN in place of ##### -in process of IN in process of ##### -in the process of IN in the process of ##### -in regard to IN in regard to ##### -inside of IN inside of ##### -in spite of IN in spite of ##### -instead of IN instead of ##### -in view of IN in view of ##### -near to IN near to ##### -next to IN next to ##### -on account of IN on account of ##### -on behalf of IN on behalf of ##### -on top of IN on top of ##### -on the top of IN on the top of ##### -on the bottom of IN on the bottom of ##### -out of IN out of ##### -outside of IN outside of ##### -owing to IN owing to ##### -prior to IN prior to ##### -subsequent to IN subsequent to ##### -such as IN such as ##### -thanks to IN thanks to ##### -to the right of IN to the right of ##### -to the left of IN to the right of ##### -together with IN together with ##### -up against IN up against ##### -up to IN up to ##### -up until IN up until ##### -with respect to IN with respect to ##### +according to IN $lemma ##### +according to JJ according to ##### +according to NN according to ##### +according to RB according to ##### +according to RP according to ##### +ahead of IN $lemma ##### +ahead of JJ ahead of ##### +ahead of NN ahead of ##### +ahead of RB ahead of ##### +ahead of RP ahead of ##### +along with IN $lemma ##### +along with JJ along with ##### +along with NN along with ##### +along with RB along with ##### +along with RP along with ##### +apart from IN $lemma ##### +apart from JJ apart from ##### +apart from NN apart from ##### +apart from RB apart from ##### +apart from RP apart from ##### +as for IN $lemma ##### +as for JJ as for ##### +as for NN as for ##### +as for RB as for ##### +as for RP as for ##### +as well as IN $lemma ##### +as well as JJ as well as ##### +as well as NN as well as ##### +as well as RB as well as ##### +as well as RP as well as ##### +aside from IN $lemma ##### +aside from JJ aside from ##### +aside from NN aside from ##### +aside from RB aside from ##### +aside from RP aside from ##### +at the bottom of IN $lemma ##### +at the bottom of JJ at the bottom of ##### +at the bottom of NN at the bottom of ##### +at the bottom of RB at the bottom of ##### +at the bottom of RP at the bottom of ##### +at the top of IN $lemma ##### +at the top of JJ at the top of ##### +at the top of NN at the top of ##### +at the top of RB at the top of ##### +at the top of RP at the top of ##### +away from IN $lemma ##### +away from JJ away from ##### +away from NN away from ##### +away from RB away from ##### +away from RP away from ##### +because of IN $lemma ##### +because of JJ because of ##### +because of NN because of ##### +because of RB because of ##### +because of RP because of ##### +but for IN $lemma ##### +but for JJ but for ##### +but for NN but for ##### +but for RB but for ##### +but for RP but for ##### +by means of IN $lemma ##### +by means of JJ by means of ##### +by means of NN by means of ##### +by means of RB by means of ##### +by means of RP by means of ##### +by virtue of IN $lemma ##### +by virtue of JJ by virtue of ##### +by virtue of NN by virtue of ##### +by virtue of RB by virtue of ##### +by virtue of RP by virtue of ##### +by way of IN $lemma ##### +by way of JJ by way of ##### +by way of NN by way of ##### +by way of RB by way of ##### +by way of RP by way of ##### +close to IN $lemma ##### +close to JJ close to ##### +close to NN close to ##### +close to RB close to ##### +close to RP close to ##### +contrary to IN $lemma ##### +contrary to JJ contrary to ##### +contrary to NN contrary to ##### +contrary to RB contrary to ##### +contrary to RP contrary to ##### +due to IN $lemma ##### +due to JJ due to ##### +due to NN due to ##### +due to RB due to ##### +due to RP due to ##### +except for IN $lemma ##### +except for JJ except for ##### +except for NN except for ##### +except for RB except for ##### +except for RP except for ##### +far from IN $lemma ##### +far from JJ far from ##### +far from NN far from ##### +far from RB far from ##### +far from RP far from ##### +for lack of IN $lemma ##### +for lack of JJ for lack of ##### +for lack of NN for lack of ##### +for lack of RB for lack of ##### +for lack of RP for lack of ##### +in accordance with IN $lemma ##### +in accordance with JJ in accordance with ##### +in accordance with NN in accordance with ##### +in accordance with RB in accordance with ##### +in accordance with RP in accordance with ##### +in addition to IN $lemma ##### +in addition to JJ in addition to ##### +in addition to NN in addition to ##### +in addition to RB in addition to ##### +in addition to RP in addition to ##### +in back of IN $lemma ##### +in back of JJ in back of ##### +in back of NN in back of ##### +in back of RB in back of ##### +in back of RP in back of ##### +in between IN $lemma ##### +in between JJ in between ##### +in between NN in between ##### +in between RB in between ##### +in between RP in between ##### +in the case of IN $lemma ##### +in the case of JJ in the case of ##### +in the case of NN in the case of ##### +in the case of RB in the case of ##### +in the case of RP in the case of ##### +in case of IN $lemma ##### +in case of JJ in case of ##### +in case of NN in case of ##### +in case of RB in case of ##### +in case of RP in case of ##### +in charge of IN $lemma ##### +in charge of JJ in charge of ##### +in charge of NN in charge of ##### +in charge of RB in charge of ##### +in charge of RP in charge of ##### +in exchange for IN $lemma ##### +in exchange for JJ in exchange for ##### +in exchange for NN in exchange for ##### +in exchange for RB in exchange for ##### +in exchange for RP in exchange for ##### +in front of IN $lemma ##### +in front of JJ in front of ##### +in front of NN in front of ##### +in front of RB in front of ##### +in front of RP in front of ##### +in light of IN $lemma ##### +in light of JJ in light of ##### +in light of NN in light of ##### +in light of RB in light of ##### +in light of RP in light of ##### +in line with IN $lemma ##### +in line with JJ in line with ##### +in line with NN in line with ##### +in line with RB in line with ##### +in line with RP in line with ##### +in place of IN $lemma ##### +in place of JJ in place of ##### +in place of NN in place of ##### +in place of RB in place of ##### +in place of RP in place of ##### +in process of IN $lemma ##### +in process of JJ in process of ##### +in process of NN in process of ##### +in process of RB in process of ##### +in process of RP in process of ##### +in the process of IN $lemma ##### +in the process of JJ in the process of ##### +in the process of NN in the process of ##### +in the process of RB in the process of ##### +in the process of RP in the process of ##### +in regard to IN $lemma ##### +in regard to JJ in regard to ##### +in regard to NN in regard to ##### +in regard to RB in regard to ##### +in regard to RP in regard to ##### +inside of IN $lemma ##### +inside of JJ inside of ##### +inside of NN inside of ##### +inside of RB inside of ##### +inside of RP inside of ##### +in spite of IN $lemma ##### +in spite of JJ in spite of ##### +in spite of NN in spite of ##### +in spite of RB in spite of ##### +in spite of RP in spite of ##### +instead of IN $lemma ##### +instead of JJ instead of ##### +instead of NN instead of ##### +instead of RB instead of ##### +instead of RP instead of ##### +in view of IN $lemma ##### +in view of JJ in view of ##### +in view of NN in view of ##### +in view of RB in view of ##### +in view of RP in view of ##### +near to IN $lemma ##### +near to JJ near to ##### +near to NN near to ##### +near to RB near to ##### +near to RP near to ##### +next to IN $lemma ##### +next to JJ next to ##### +next to NN next to ##### +next to RB next to ##### +next to RP next to ##### +on account of IN $lemma ##### +on account of JJ on account of ##### +on account of NN on account of ##### +on account of RB on account of ##### +on account of RP on account of ##### +on behalf of IN $lemma ##### +on behalf of JJ on behalf of ##### +on behalf of NN on behalf of ##### +on behalf of RB on behalf of ##### +on behalf of RP on behalf of ##### +on top of IN $lemma ##### +on top of JJ on top of ##### +on top of NN on top of ##### +on top of RB on top of ##### +on top of RP on top of ##### +on the top of IN $lemma ##### +on the top of JJ on the top of ##### +on the top of NN on the top of ##### +on the top of RB on the top of ##### +on the top of RP on the top of ##### +on the bottom of IN $lemma ##### +on the bottom of JJ on the bottom of ##### +on the bottom of NN on the bottom of ##### +on the bottom of RB on the bottom of ##### +on the bottom of RP on the bottom of ##### +out of IN $lemma ##### +out of JJ out of ##### +out of NN out of ##### +out of RB out of ##### +out of RP out of ##### +outside of IN $lemma ##### +outside of JJ outside of ##### +outside of NN outside of ##### +outside of RB outside of ##### +outside of RP outside of ##### +owing to IN $lemma ##### +owing to JJ owing to ##### +owing to NN owing to ##### +owing to RB owing to ##### +owing to RP owing to ##### +prior to IN $lemma ##### +prior to JJ prior to ##### +prior to NN prior to ##### +prior to RB prior to ##### +prior to RP prior to ##### +subsequent to IN $lemma ##### +subsequent to JJ subsequent to ##### +subsequent to NN subsequent to ##### +subsequent to RB subsequent to ##### +subsequent to RP subsequent to ##### +such as IN $lemma ##### +such as JJ such as ##### +such as NN such as ##### +such as RB such as ##### +such as RP such as ##### +thanks to IN $lemma ##### +thanks to JJ thanks to ##### +thanks to NN thanks to ##### +thanks to RB thanks to ##### +thanks to RP thanks to ##### +to the right of IN $lemma ##### +to the right of JJ to the right of ##### +to the right of NN to the right of ##### +to the right of RB to the right of ##### +to the right of RP to the right of ##### +to the left of IN $lemma ##### +to the left of JJ to the right of ##### +to the left of NN to the right of ##### +to the left of RB to the right of ##### +to the left of RP to the right of ##### +together with IN $lemma ##### +together with JJ together with ##### +together with NN together with ##### +together with RB together with ##### +together with RP together with ##### +up against IN $lemma ##### +up against JJ up against ##### +up against NN up against ##### +up against RB up against ##### +up against RP up against ##### +up to IN $lemma ##### +up to JJ up to ##### +up to NN up to ##### +up to RB up to ##### +up to RP up to ##### +up until IN $lemma ##### +up until JJ up until ##### +up until NN up until ##### +up until RB up until ##### +up until RP up until ##### +with respect to IN $lemma ##### +with respect to JJ with respect to ##### +with respect to NN with respect to ##### +with respect to RB with respect to ##### +with respect to RP with respect to ##### diff --git a/en/maca_trans_tagger/maca_trans_tagger.fm b/en/maca_trans_tagger/maca_trans_tagger.fm index 9738c25b96db8bac9f4852ad35a95110218f9dbb..56b65edcf1d3d7ed5f5602fac24121d2e0ea297a 100644 --- a/en/maca_trans_tagger/maca_trans_tagger.fm +++ b/en/maca_trans_tagger/maca_trans_tagger.fm @@ -1,14 +1,27 @@ b0U1 b0sgn -b1sgn b0f -b1f -b2f -s0f -s1f -s0p -s1p -s2p -s0p s1p -s0p s1p s2p -s1p s2p +b0len +bm1f +bm2f +bm1p +bm2p +bm3p +bm2p bm1p +bm2p bm3p +bm1p b0sgn + +#b0U1 +#b0sgn +#b1sgn +#b0f +#b1f +#b2f +#s0f +#s1f +#s0p +#s1p +#s2p +#s0p s1p +#s0p s1p s2p +#s1p s2p