Skip to content
Snippets Groups Projects
Commit ac439517 authored by Johannes Heinecke's avatar Johannes Heinecke
Browse files

adding some comments

parent 11029e07
Branches
No related tags found
1 merge request!5Johannes: wrapping to use macaon_trans_tagger, macaon_trans_lemmatizer and macaon_trans_parser within python or java programmes
......@@ -28,7 +28,6 @@ MacaonTransLemmatizer::MacaonTransLemmatizer(char *lg, char *mcd) {
ctx = context_read_options(5, argv);
form_pos_ht = hash_new(1000000);
//maca_lemmatizer_check_options(ctx);
maca_lemmatizer_set_linguistic_resources_filenames(ctx);
......@@ -56,9 +55,9 @@ const char *MacaonTransLemmatizer::lemmatizemcf(const char *mcfString) {
FILE *f = fmemopen ((void *)mcfString, strlen(mcfString), "r");
config *c = config_new(f, ctx->mcd_struct, 5);
char lemma[200];
char form[200];
char pos[200];
//char lemma[200];
//char form[200];
//char pos[200];
size_t size;
if (resultstring != NULL) {
......@@ -86,7 +85,8 @@ const char *MacaonTransLemmatizer::lemmatizemcf(const char *mcfString) {
fclose(outstream);
return resultstring;
}
/** taken from maca_trans_lemmatizer and added FILE * outstream to write the result to
*/
void MacaonTransLemmatizer::print_word(word *w, mcd *mcd_struct, char *lemma, FILE *outstream) {
char *buffer = NULL;
char *token = NULL;
......@@ -116,7 +116,7 @@ void MacaonTransLemmatizer::print_word(word *w, mcd *mcd_struct, char *lemma, FI
}
// taken as is from maca_lemmatizer.c
/** taken as is from maca_lemmatizer.c since it is not included in libtransparse.a */
void MacaonTransLemmatizer::maca_lemmatizer_set_linguistic_resources_filenames(context *ctx) {
char absolute_filename[500];
......@@ -131,6 +131,7 @@ void MacaonTransLemmatizer::maca_lemmatizer_set_linguistic_resources_filenames(c
}
}
/** taken as is from maca_trans_lemmatizer.c since it is not included in libtransparse.a */
char **MacaonTransLemmatizer::read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode, int *lemma_array_size) {
char form[1000];
char pos[1000];
......@@ -182,9 +183,9 @@ char **MacaonTransLemmatizer::read_fplm_file(char *fplm_filename, hash *form_pos
fclose(f);
return lemma_array;
}
/** taken as is from maca_trans_lemmatizer.c since it is not included in libtransparse.a */
char *MacaonTransLemmatizer::lookup_lemma(char *form, char *pos, hash *form_pos_ht, char **lemma_array, int verbose) {
char form_pos[1000];
//char form_pos[1000];
int index_form_pos;
strcpy(form_pos, form);
......
......@@ -4,15 +4,17 @@ class MacaonTransLemmatizer {
public:
/**
creates instance, assumes that the environment variable MACAON_DIR
is defined
is defined. One instance for each thread has to be created (this means loading resources for each thread).
@param lg language to be used (in the sense of sub-dir in MACAON_DIR)
@param mcd the filename of the mcd definitions
*/
MacaonTransLemmatizer(char *lg, char *mcd);
~MacaonTransLemmatizer();
/** call lemmatizer
@param mcfString a string containing the sentence to be analysed in mcf format
(at least the columns form, pos, lemma must be present
(at least the columns form, pos must be present
@return the parser output
*/
const char *lemmatizemcf(const char *mcfString);
......@@ -25,6 +27,13 @@ class MacaonTransLemmatizer {
/// keeps last result (or NULL)
char *resultstring;
/// variables used during lemmatization
char form_pos[1000];
char lemma[200];
char form[200];
char pos[200];
/// variables to stock data
hash *form_pos_ht = NULL;
char **lemma_array = NULL;
int lemma_array_size;
......
......@@ -7,11 +7,9 @@ extern "C"{
#endif
#include "context.h"
#include "feat_fct.h"
#include "config2feat_vec.h"
#include "feature_table.h"
#include "dico.h"
#include "movement_tagger.h"
#include "simple_decoder_tagger.h"
#ifdef __cplusplus
}
#endif
......@@ -57,7 +55,7 @@ const char *MacaonTransTagger::tagmcf(const char *mcfString) {
}
// taken as is
/** taken as is from maca_trans_tagger.c */
void MacaonTransTagger::decode_tagger_set_linguistic_resources_filenames(context *ctx) {
char absolute_filename[500];
......@@ -101,7 +99,7 @@ void MacaonTransTagger::decode_tagger_set_linguistic_resources_filenames(context
}
}
// taken from simple_decoder_tagger.c and modified to read from string
/** taken from simple_decoder_tagger.c and modified to read from string and write to string */
void MacaonTransTagger::simple_decoder_tagger(context *ctx, const char *mcfString) {
config *c;
feat_vec *fv = feat_vec_new(feature_types_nb);
......@@ -167,6 +165,8 @@ void MacaonTransTagger::simple_decoder_tagger(context *ctx, const char *mcfStrin
fclose(f);
}
/** taken from simple_decoder_tagger.c and modified (paramater FILE *outstreal)
*/
void MacaonTransTagger::print_word(word *w, mcd *mcd_struct, dico *dico_pos, int postag, FILE *outstream) {
char *buffer = NULL;
char *token = NULL;
......@@ -193,20 +193,3 @@ void MacaonTransTagger::print_word(word *w, mcd *mcd_struct, dico *dico_pos, int
free(buffer);
}
}
void MacaonTransTagger::add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p) {
int i;
word *w;
char lower_form[100];
for(i = word_buffer_get_nbelem(bf) - 1; i >=0 ; i--){
w = word_buffer_get_word_n(bf, i);
if(word_get_signature(w) != -1) break;
w->signature = form2pos_get_signature(f2p, w->form);
if(w->signature == -1){
strcpy(lower_form, w->form);
to_lower_string(lower_form);
w->signature = form2pos_get_signature(f2p, lower_form);
}
}
}
......@@ -4,15 +4,17 @@ class MacaonTransTagger {
public:
/**
creates instance, assumes that the environment variable MACAON_DIR
is defined
is defined. One instance for each thread has to be created (this means loading resources for each thread).
@param lg language to be used (in the sense of sub-dir in MACAON_DIR)
@param mcd the filename of the mcd definitions
*/
MacaonTransTagger(char *lg, char *mcd);
~MacaonTransTagger();
/** call tagger
@param mcfString a string containing the sentence to be analysed in mcf format
(at least the columns form, pos, lemma must be present
@param mcfString a string containing the sentence to be analysed in mcf format.
At least the column form must be present
@return the parser output
*/
const char *tagmcf(const char *mcfString);
......@@ -24,13 +26,12 @@ class MacaonTransTagger {
context *ctx;
/// keeps last result (or NULL)
char *resultstring;
void decode_tagger_set_linguistic_resources_filenames(context *ctx);
// import functions which are not available in libtransparse.a or are modified
void decode_tagger_set_linguistic_resources_filenames(context *ctx);
void simple_decoder_tagger(context *ctx, const char *mcf);
void print_word(word *w, mcd *mcd_struct, dico *dico_pos, int postag, FILE *stream);
void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p);
};
//extern void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p);
#ifndef __SIMPLE_DECODER_TAGGER__
#define __SIMPLE_DECODER_TAGGER__
void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p);
void simple_decoder_tagger(context *ctx);
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment