Commit bbb0919c authored by Alexis Nasr's avatar Alexis Nasr
Browse files

code refactoring

parent a4db64a2
......@@ -10,6 +10,9 @@ The basic procedure to build and install macaon from sources is the following.
- Launch the cmake command:
cmake ..
If you want to compile macaon with debugging options type:
cmake -DCMAKE_BUILD_TYPE=Debug ..
If you want to install macaon locally, you can specify the install path with :
cmake -DCMAKE_INSTALL_PREFIX:PATH=/absolute/path/to/macaon_install_dir
......
......@@ -5,4 +5,5 @@
void myfree(void *ptr);
void *memalloc(size_t s);
FILE *myfopen(const char *path, const char *mode);
FILE *myfopen_no_exit(const char *path, const char *mode);
#endif
......@@ -31,7 +31,7 @@ void form2pos_free(form2pos *f2p)
form2pos *form2pos_read(char *filename)
{
FILE *f = myfopen(filename, "r");
FILE *f = myfopen_no_exit(filename, "r");
int nbelem;
int pos_nb;
char pos_list[10000];
......@@ -39,6 +39,8 @@ form2pos *form2pos_read(char *filename)
char signature[200];
form2pos *f2p = NULL;
if(f == NULL) return NULL;
/* read number of forms */
fscanf(f, "%d\n", &nbelem);
......
......@@ -25,3 +25,12 @@ FILE *myfopen(const char *path, const char *mode)
}
return f;
}
FILE *myfopen_no_exit(const char *path, const char *mode)
{
FILE *f = fopen(path, mode);
if(f == NULL){
fprintf(stderr, "cannot open file %s\n", path);
}
return f;
}
......@@ -123,7 +123,7 @@ int main(int argc, char *argv[])
/* look for a valid word */
while(fgets(buffer, 10000, f)){
if(feof(f)) return 0; /* no more words to read */
if((buffer[0] == '\n') || (buffer[0] == ' ')){
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')){
printf("\n");
continue;
}
......
......@@ -7,9 +7,6 @@
#include "context.h"
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
......@@ -306,13 +303,8 @@ context *context_read_options(int argc, char *argv[])
}
}
context_set_linguistic_resources_filenames(ctx);
if(ctx->features_model_filename){
ctx->features_model = feat_model_read(ctx->features_model_filename);
}
/* if(ctx->mcd_filename && ctx->conll_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename);
ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
......@@ -341,7 +333,7 @@ context *context_read_options(int argc, char *argv[])
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
void context_set_linguistic_resources_filenames_parser(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
......@@ -382,11 +374,10 @@ void context_set_linguistic_resources_filenames(context *ctx)
ctx->features_model_filename = strdup(absolute_filename);
}
/* fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
}
void context_set_linguistic_resources_filenames_tagger(context *ctx)
......@@ -405,7 +396,7 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
if(!ctx->perc_model_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME);
......@@ -430,9 +421,17 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
ctx->features_model_filename = strdup(absolute_filename);
}
/* fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
if(!ctx->f2p_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_F2P_FILENAME);
ctx->f2p_filename = strdup(absolute_filename);
ctx->f2p = form2pos_read(ctx->f2p_filename);
}
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
}
......@@ -14,6 +14,7 @@
#define DEFAULT_FEATURES_MODEL_TAGGER_FILENAME "maca_trans_tagger.fm"
#define DEFAULT_VOCABS_TAGGER_FILENAME "maca_trans_tagger.vocab"
#define DEFAULT_MODEL_TAGGER_FILENAME "maca_trans_tagger.model"
#define DEFAULT_F2P_FILENAME "fP"
#include "dico_vec.h"
#include "feat_model.h"
......@@ -92,4 +93,10 @@ void context_maca_data_path_help_message(context *ctx);
void context_f2p_filename_help_message(context *ctx);
void context_set_linguistic_resources_filenames_tagger(context *ctx);
void context_set_linguistic_resources_filenames_parser(context *ctx);
#endif
......@@ -53,6 +53,9 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv);
decode_check_options(ctx);
context_set_linguistic_resources_filenames_parser(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
......
......@@ -46,11 +46,12 @@ int main(int argc, char *argv[])
{
FILE *conll_file = NULL;
context *ctx;
/* struct fann *ann; */
ctx = context_read_options(argc, argv);
decode_check_options(ctx);
context_set_linguistic_resources_filenames_tagger(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
......
......@@ -77,7 +77,8 @@ void depset_print2(FILE *f, depset *d, dico *dico_labels)
for(i=1; i < d->length; i++){
if((d->array[i].gov) && (d->array[i].dep)){
fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label));
/* fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label ));*/
fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov), dico_int2string(dico_labels, d->array[i].label));
}
}
fprintf(f, "\n");
......
......@@ -169,6 +169,10 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv);
maca_trans_parser_conll2cff_check_options(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
......
......@@ -82,7 +82,6 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
fprintf(output_file, "%d", postag);
feat_vec_print(output_file, fv);
if(postag != -1)
movement_tagger(c, postag, 0, 1);
}
......@@ -111,7 +110,6 @@ void generate_training_file_buffer(FILE *output_file, context *ctx)
if(ctx->f2p)
add_signature_to_words_in_queue(c->bf, ctx->f2p);
while(!config_is_terminal(c)){
/* config_print(stdout, c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
......@@ -136,6 +134,9 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv);
maca_trans_parser_conll2cff_check_options(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
......@@ -146,7 +147,6 @@ int main(int argc, char *argv[])
}
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
/* in train mode create feature dictionnary for perceptron */
if(ctx->mode == TRAIN_MODE)
......
......@@ -22,7 +22,7 @@ int queue_read_sentence(queue *bf, FILE *f, mcd *mcd_struct)
while(fgets(buffer, 10000, f)){
if(feof(f)) break;
/* fprintf(stderr, "%s", buffer); */
if((buffer[0] == '\n') || (buffer[0] == ' ')) break; /* end of the sentence */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */
w = word_parse_buffer(buffer, mcd_struct);
if(word_get_index(w) == -1){
w->feat_array[FEAT_TYPE_INDEX] = index++;
......
......@@ -21,25 +21,17 @@ void add_signature_to_words_in_queue(queue *bf, form2pos *f2p)
}
}
void simple_decoder_buffer(context *ctx)
{
FILE *f = NULL;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
feature_table *ft = feature_table_load(ctx->perc_model_filename);
config *c = NULL;
int postag;
feat_vec *fv = feat_vec_new(feature_types_nb);
float max;
int i;
word *w;
if(ctx->conll_filename)
f= myfopen(ctx->conll_filename, "r");
else
f= stdin;
c = config_initial(f, ctx->mcd_struct, 1000, 0);
word *w = NULL;
FILE *f = (ctx->conll_filename)? myfopen(ctx->conll_filename, "r") : stdin;
config *c = config_initial(f, ctx->mcd_struct, 1000, 0);
/* read a sentence and put it in the buffer */
while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){
......@@ -59,6 +51,7 @@ void simple_decoder_buffer(context *ctx)
w = stack_elt_n(c->st, i);
printf("%s\t%s\n", w->input, dico_int2string(dico_pos, word_get_pos(w)));
}
printf("\n");
/* config_free(c); */
c = config_initial(f, ctx->mcd_struct, 1000, 0);
......@@ -74,7 +67,6 @@ void simple_decoder_stream(context *ctx)
feat_vec *fv = feat_vec_new(feature_types_nb);
FILE *f = NULL;
/* when in stream mode, force to renumber the tokens (ugly !) */
ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1;
......@@ -82,23 +74,14 @@ void simple_decoder_stream(context *ctx)
while(!config_is_terminal(c)){
config_print(stdout, c);
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
}
/* config_print(stdout, c); */
/* config_free(c); */
}
void simple_decoder_tagger(context *ctx)
/* (FILE *f, mcd *mcd_struct, dico *d_perceptron_features, dico *dico_pos, feature_table *ft, feat_model *fm, int verbose, int stream_mode)*/
{
/*conll_file, ctx->mcd_struct, ctx->d_perceptron_features, dico_pos, ft, ctx->features_model, ctx->verbose, ctx->stream_mode);*/
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
if(ctx->stream_mode)
......
......@@ -31,6 +31,7 @@ word *word_read(FILE *f, mcd *mcd_struct)
while(fgets(buffer, 10000, f)){
if(feof(f)) return NULL; /* no more words to read */
if((buffer[0] != '\n') && (buffer[0] != ' ')){
/* printf("word = %s\n", buffer); */
return word_parse_buffer(buffer, mcd_struct);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment