diff --git a/maca_export/src/maca_lemmatizer_export.cc b/maca_export/src/maca_lemmatizer_export.cc index 508ab7cfe30c90ebe3d26e81bf5eaca68f01458a..af4ee502b7fa7448bd08d05829a0045de8663d32 100644 --- a/maca_export/src/maca_lemmatizer_export.cc +++ b/maca_export/src/maca_lemmatizer_export.cc @@ -27,7 +27,6 @@ MacaonTransLemmatizer::MacaonTransLemmatizer(char *lg, char *mcd) { ctx = context_read_options(5, argv); form_pos_ht = hash_new(1000000); - //maca_lemmatizer_check_options(ctx); maca_lemmatizer_set_linguistic_resources_filenames(ctx); @@ -56,9 +55,9 @@ const char *MacaonTransLemmatizer::lemmatizemcf(const char *mcfString) { FILE *f = fmemopen ((void *)mcfString, strlen(mcfString), "r"); config *c = config_new(f, ctx->mcd_struct, 5); - char lemma[200]; - char form[200]; - char pos[200]; + //char lemma[200]; + //char form[200]; + //char pos[200]; size_t size; if (resultstring != NULL) { @@ -86,7 +85,8 @@ const char *MacaonTransLemmatizer::lemmatizemcf(const char *mcfString) { fclose(outstream); return resultstring; } - +/** taken from maca_trans_lemmatizer and added FILE * outstream to write the result to + */ void MacaonTransLemmatizer::print_word(word *w, mcd *mcd_struct, char *lemma, FILE *outstream) { char *buffer = NULL; char *token = NULL; @@ -116,7 +116,7 @@ void MacaonTransLemmatizer::print_word(word *w, mcd *mcd_struct, char *lemma, FI } -// taken as is from maca_lemmatizer.c +/** taken as is from maca_lemmatizer.c since it is not included in libtransparse.a */ void MacaonTransLemmatizer::maca_lemmatizer_set_linguistic_resources_filenames(context *ctx) { char absolute_filename[500]; @@ -131,6 +131,7 @@ void MacaonTransLemmatizer::maca_lemmatizer_set_linguistic_resources_filenames(c } } +/** taken as is from maca_trans_lemmatizer.c since it is not included in libtransparse.a */ char **MacaonTransLemmatizer::read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode, int *lemma_array_size) { char form[1000]; char pos[1000]; @@ -182,9 +183,9 @@ char **MacaonTransLemmatizer::read_fplm_file(char *fplm_filename, hash *form_pos fclose(f); return lemma_array; } - +/** taken as is from maca_trans_lemmatizer.c since it is not included in libtransparse.a */ char *MacaonTransLemmatizer::lookup_lemma(char *form, char *pos, hash *form_pos_ht, char **lemma_array, int verbose) { - char form_pos[1000]; + //char form_pos[1000]; int index_form_pos; strcpy(form_pos, form); diff --git a/maca_export/src/maca_lemmatizer_export.h b/maca_export/src/maca_lemmatizer_export.h index fd676eea6f743d3f617db92371ad29a6de39b1bb..afa63bc1071a69a8e52df08ba1985cb540ef83c7 100644 --- a/maca_export/src/maca_lemmatizer_export.h +++ b/maca_export/src/maca_lemmatizer_export.h @@ -4,15 +4,17 @@ class MacaonTransLemmatizer { public: /** creates instance, assumes that the environment variable MACAON_DIR - is defined + is defined. One instance for each thread has to be created (this means loading resources for each thread). @param lg language to be used (in the sense of sub-dir in MACAON_DIR) @param mcd the filename of the mcd definitions */ MacaonTransLemmatizer(char *lg, char *mcd); + ~MacaonTransLemmatizer(); + /** call lemmatizer @param mcfString a string containing the sentence to be analysed in mcf format - (at least the columns form, pos, lemma must be present + (at least the columns form, pos must be present @return the parser output */ const char *lemmatizemcf(const char *mcfString); @@ -25,6 +27,13 @@ class MacaonTransLemmatizer { /// keeps last result (or NULL) char *resultstring; + /// variables used during lemmatization + char form_pos[1000]; + char lemma[200]; + char form[200]; + char pos[200]; + + /// variables to stock data hash *form_pos_ht = NULL; char **lemma_array = NULL; int lemma_array_size; diff --git a/maca_export/src/maca_trans_tagger_export.cc b/maca_export/src/maca_trans_tagger_export.cc index 33fe8e81352a08cd744b93b892999724dd8bfb0b..a791372ddf7d6977ed6b34418e0a7fa84514ac5b 100644 --- a/maca_export/src/maca_trans_tagger_export.cc +++ b/maca_export/src/maca_trans_tagger_export.cc @@ -7,11 +7,9 @@ extern "C"{ #endif #include "context.h" -#include "feat_fct.h" #include "config2feat_vec.h" -#include "feature_table.h" -#include "dico.h" #include "movement_tagger.h" +#include "simple_decoder_tagger.h" #ifdef __cplusplus } #endif @@ -57,7 +55,7 @@ const char *MacaonTransTagger::tagmcf(const char *mcfString) { } -// taken as is +/** taken as is from maca_trans_tagger.c */ void MacaonTransTagger::decode_tagger_set_linguistic_resources_filenames(context *ctx) { char absolute_filename[500]; @@ -101,7 +99,7 @@ void MacaonTransTagger::decode_tagger_set_linguistic_resources_filenames(context } } -// taken from simple_decoder_tagger.c and modified to read from string +/** taken from simple_decoder_tagger.c and modified to read from string and write to string */ void MacaonTransTagger::simple_decoder_tagger(context *ctx, const char *mcfString) { config *c; feat_vec *fv = feat_vec_new(feature_types_nb); @@ -167,6 +165,8 @@ void MacaonTransTagger::simple_decoder_tagger(context *ctx, const char *mcfStrin fclose(f); } +/** taken from simple_decoder_tagger.c and modified (paramater FILE *outstreal) + */ void MacaonTransTagger::print_word(word *w, mcd *mcd_struct, dico *dico_pos, int postag, FILE *outstream) { char *buffer = NULL; char *token = NULL; @@ -193,20 +193,3 @@ void MacaonTransTagger::print_word(word *w, mcd *mcd_struct, dico *dico_pos, int free(buffer); } } - -void MacaonTransTagger::add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p) { - int i; - word *w; - char lower_form[100]; - - for(i = word_buffer_get_nbelem(bf) - 1; i >=0 ; i--){ - w = word_buffer_get_word_n(bf, i); - if(word_get_signature(w) != -1) break; - w->signature = form2pos_get_signature(f2p, w->form); - if(w->signature == -1){ - strcpy(lower_form, w->form); - to_lower_string(lower_form); - w->signature = form2pos_get_signature(f2p, lower_form); - } - } -} diff --git a/maca_export/src/maca_trans_tagger_export.h b/maca_export/src/maca_trans_tagger_export.h index 62663dd69d066d28f0b262097aeec8d87b443c23..1d3aefe500aa178d27b5e7c6c543de92dde1714e 100644 --- a/maca_export/src/maca_trans_tagger_export.h +++ b/maca_export/src/maca_trans_tagger_export.h @@ -4,15 +4,17 @@ class MacaonTransTagger { public: /** creates instance, assumes that the environment variable MACAON_DIR - is defined + is defined. One instance for each thread has to be created (this means loading resources for each thread). @param lg language to be used (in the sense of sub-dir in MACAON_DIR) @param mcd the filename of the mcd definitions */ MacaonTransTagger(char *lg, char *mcd); + ~MacaonTransTagger(); + /** call tagger - @param mcfString a string containing the sentence to be analysed in mcf format - (at least the columns form, pos, lemma must be present + @param mcfString a string containing the sentence to be analysed in mcf format. + At least the column form must be present @return the parser output */ const char *tagmcf(const char *mcfString); @@ -24,13 +26,12 @@ class MacaonTransTagger { context *ctx; /// keeps last result (or NULL) char *resultstring; - void decode_tagger_set_linguistic_resources_filenames(context *ctx); + // import functions which are not available in libtransparse.a or are modified + void decode_tagger_set_linguistic_resources_filenames(context *ctx); void simple_decoder_tagger(context *ctx, const char *mcf); void print_word(word *w, mcd *mcd_struct, dico *dico_pos, int postag, FILE *stream); - void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p); }; -//extern void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p); diff --git a/maca_trans_parser/src/simple_decoder_tagger.h b/maca_trans_parser/src/simple_decoder_tagger.h index 28f4f1091d01c97638b0a891c76176c68042992a..b5d5b2aa019629a560011a9b7f575228a7cb3353 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.h +++ b/maca_trans_parser/src/simple_decoder_tagger.h @@ -1,6 +1,6 @@ #ifndef __SIMPLE_DECODER_TAGGER__ #define __SIMPLE_DECODER_TAGGER__ - +void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p); void simple_decoder_tagger(context *ctx); #endif