Skip to content
Snippets Groups Projects
Commit bbb0919c authored by Alexis Nasr's avatar Alexis Nasr
Browse files

code refactoring

parent a4db64a2
Branches
No related tags found
No related merge requests found
Showing with 62 additions and 48 deletions
......@@ -10,6 +10,9 @@ The basic procedure to build and install macaon from sources is the following.
- Launch the cmake command:
cmake ..
If you want to compile macaon with debugging options type:
cmake -DCMAKE_BUILD_TYPE=Debug ..
If you want to install macaon locally, you can specify the install path with :
cmake -DCMAKE_INSTALL_PREFIX:PATH=/absolute/path/to/macaon_install_dir
......
......@@ -5,4 +5,5 @@
void myfree(void *ptr);
void *memalloc(size_t s);
FILE *myfopen(const char *path, const char *mode);
FILE *myfopen_no_exit(const char *path, const char *mode);
#endif
......@@ -31,7 +31,7 @@ void form2pos_free(form2pos *f2p)
form2pos *form2pos_read(char *filename)
{
FILE *f = myfopen(filename, "r");
FILE *f = myfopen_no_exit(filename, "r");
int nbelem;
int pos_nb;
char pos_list[10000];
......@@ -39,6 +39,8 @@ form2pos *form2pos_read(char *filename)
char signature[200];
form2pos *f2p = NULL;
if(f == NULL) return NULL;
/* read number of forms */
fscanf(f, "%d\n", &nbelem);
......
......@@ -25,3 +25,12 @@ FILE *myfopen(const char *path, const char *mode)
}
return f;
}
FILE *myfopen_no_exit(const char *path, const char *mode)
{
FILE *f = fopen(path, mode);
if(f == NULL){
fprintf(stderr, "cannot open file %s\n", path);
}
return f;
}
......@@ -123,7 +123,7 @@ int main(int argc, char *argv[])
/* look for a valid word */
while(fgets(buffer, 10000, f)){
if(feof(f)) return 0; /* no more words to read */
if((buffer[0] == '\n') || (buffer[0] == ' ')){
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')){
printf("\n");
continue;
}
......
......@@ -7,9 +7,6 @@
#include "context.h"
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
......@@ -306,13 +303,8 @@ context *context_read_options(int argc, char *argv[])
}
}
context_set_linguistic_resources_filenames(ctx);
if(ctx->features_model_filename){
ctx->features_model = feat_model_read(ctx->features_model_filename);
}
/* if(ctx->mcd_filename && ctx->conll_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename);
ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
......@@ -341,7 +333,7 @@ context *context_read_options(int argc, char *argv[])
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
void context_set_linguistic_resources_filenames_parser(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
......@@ -382,11 +374,10 @@ void context_set_linguistic_resources_filenames(context *ctx)
ctx->features_model_filename = strdup(absolute_filename);
}
/* fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
}
void context_set_linguistic_resources_filenames_tagger(context *ctx)
......@@ -430,9 +421,17 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
ctx->features_model_filename = strdup(absolute_filename);
}
/* fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
if(!ctx->f2p_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_F2P_FILENAME);
ctx->f2p_filename = strdup(absolute_filename);
ctx->f2p = form2pos_read(ctx->f2p_filename);
}
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
}
......@@ -14,6 +14,7 @@
#define DEFAULT_FEATURES_MODEL_TAGGER_FILENAME "maca_trans_tagger.fm"
#define DEFAULT_VOCABS_TAGGER_FILENAME "maca_trans_tagger.vocab"
#define DEFAULT_MODEL_TAGGER_FILENAME "maca_trans_tagger.model"
#define DEFAULT_F2P_FILENAME "fP"
#include "dico_vec.h"
#include "feat_model.h"
......@@ -92,4 +93,10 @@ void context_maca_data_path_help_message(context *ctx);
void context_f2p_filename_help_message(context *ctx);
void context_set_linguistic_resources_filenames_tagger(context *ctx);
void context_set_linguistic_resources_filenames_parser(context *ctx);
#endif
......@@ -53,6 +53,9 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv);
decode_check_options(ctx);
context_set_linguistic_resources_filenames_parser(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
......
......@@ -46,11 +46,12 @@ int main(int argc, char *argv[])
{
FILE *conll_file = NULL;
context *ctx;
/* struct fann *ann; */
ctx = context_read_options(argc, argv);
decode_check_options(ctx);
context_set_linguistic_resources_filenames_tagger(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
......
......@@ -77,7 +77,8 @@ void depset_print2(FILE *f, depset *d, dico *dico_labels)
for(i=1; i < d->length; i++){
if((d->array[i].gov) && (d->array[i].dep)){
fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label));
/* fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label ));*/
fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov), dico_int2string(dico_labels, d->array[i].label));
}
}
fprintf(f, "\n");
......
......@@ -170,6 +170,10 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv);
maca_trans_parser_conll2cff_check_options(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
......
......@@ -82,7 +82,6 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
fprintf(output_file, "%d", postag);
feat_vec_print(output_file, fv);
if(postag != -1)
movement_tagger(c, postag, 0, 1);
}
......@@ -111,7 +110,6 @@ void generate_training_file_buffer(FILE *output_file, context *ctx)
if(ctx->f2p)
add_signature_to_words_in_queue(c->bf, ctx->f2p);
while(!config_is_terminal(c)){
/* config_print(stdout, c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
......@@ -136,6 +134,9 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv);
maca_trans_parser_conll2cff_check_options(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
......@@ -147,7 +148,6 @@ int main(int argc, char *argv[])
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
/* in train mode create feature dictionnary for perceptron */
if(ctx->mode == TRAIN_MODE)
ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features", 10000000);
......
......@@ -22,7 +22,7 @@ int queue_read_sentence(queue *bf, FILE *f, mcd *mcd_struct)
while(fgets(buffer, 10000, f)){
if(feof(f)) break;
/* fprintf(stderr, "%s", buffer); */
if((buffer[0] == '\n') || (buffer[0] == ' ')) break; /* end of the sentence */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */
w = word_parse_buffer(buffer, mcd_struct);
if(word_get_index(w) == -1){
w->feat_array[FEAT_TYPE_INDEX] = index++;
......
......@@ -21,25 +21,17 @@ void add_signature_to_words_in_queue(queue *bf, form2pos *f2p)
}
}
void simple_decoder_buffer(context *ctx)
{
FILE *f = NULL;
dico *dico_pos = dico_vec_get_dico(ctx->vocabs, (char *)"POS");
feature_table *ft = feature_table_load(ctx->perc_model_filename);
config *c = NULL;
int postag;
feat_vec *fv = feat_vec_new(feature_types_nb);
float max;
int i;
word *w;
if(ctx->conll_filename)
f= myfopen(ctx->conll_filename, "r");
else
f= stdin;
c = config_initial(f, ctx->mcd_struct, 1000, 0);
word *w = NULL;
FILE *f = (ctx->conll_filename)? myfopen(ctx->conll_filename, "r") : stdin;
config *c = config_initial(f, ctx->mcd_struct, 1000, 0);
/* read a sentence and put it in the buffer */
while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){
......@@ -59,6 +51,7 @@ void simple_decoder_buffer(context *ctx)
w = stack_elt_n(c->st, i);
printf("%s\t%s\n", w->input, dico_int2string(dico_pos, word_get_pos(w)));
}
printf("\n");
/* config_free(c); */
c = config_initial(f, ctx->mcd_struct, 1000, 0);
......@@ -74,7 +67,6 @@ void simple_decoder_stream(context *ctx)
feat_vec *fv = feat_vec_new(feature_types_nb);
FILE *f = NULL;
/* when in stream mode, force to renumber the tokens (ugly !) */
ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1;
......@@ -82,23 +74,14 @@ void simple_decoder_stream(context *ctx)
while(!config_is_terminal(c)){
config_print(stdout, c);
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
}
/* config_print(stdout, c); */
/* config_free(c); */
}
void simple_decoder_tagger(context *ctx)
/* (FILE *f, mcd *mcd_struct, dico *d_perceptron_features, dico *dico_pos, feature_table *ft, feat_model *fm, int verbose, int stream_mode)*/
{
/*conll_file, ctx->mcd_struct, ctx->d_perceptron_features, dico_pos, ft, ctx->features_model, ctx->verbose, ctx->stream_mode);*/
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
if(ctx->stream_mode)
......
......@@ -31,6 +31,7 @@ word *word_read(FILE *f, mcd *mcd_struct)
while(fgets(buffer, 10000, f)){
if(feof(f)) return NULL; /* no more words to read */
if((buffer[0] != '\n') && (buffer[0] != ' ')){
/* printf("word = %s\n", buffer); */
return word_parse_buffer(buffer, mcd_struct);
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment