diff --git a/maca_common/src/dico.c b/maca_common/src/dico.c index eb0a1cd3a8555d67bbe47f4405807d7d0cc47ba0..58155929e8a97d00e29d32889b38b683968e5806 100644 --- a/maca_common/src/dico.c +++ b/maca_common/src/dico.c @@ -153,6 +153,7 @@ dico *dico_extract_from_corpus(char *filename, int column, char *dico_name) char *token; int column_nb = 0; + if(feof(f)) return NULL; /* no more words to read */ while(fgets(buffer, 10000, f)){ @@ -164,7 +165,7 @@ dico *dico_extract_from_corpus(char *filename, int column, char *dico_name) column_nb = 0; do{ if(column_nb == column){ - /* printf("token = %s\n", token); */ + // printf("token = %s\n", token); dico_add(d, token); } column_nb++; diff --git a/maca_tools/src/json2mcf.c b/maca_tools/src/json2mcf.c index 97da47daa426538dad9f9400faf2a14be527e589..d44a70a551cce18713226ef03649b3a1116c7516 100644 --- a/maca_tools/src/json2mcf.c +++ b/maca_tools/src/json2mcf.c @@ -232,14 +232,18 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat { int index; word *w; - int label_code; + int label_code = -1; dico *d; + mcd *mcd_struct = NULL; + + mcd_struct = word_buffer_get_mcd(wb); + d = mcd_struct->dico_array[mcd_get_pos_col(mcd_struct)]; - if(status_lab && !strcmp(status_lab, "GOLD")){ - printf("updating label of segment [%d-%d] with \"%s\"\n", start, end, label); + if(status_lab && !strcmp(status_lab, "G")){ + fprintf(stderr, "updating label of segment [%d-%d] with \"%s\"\n", start, end, label); index = word_buffer_locate_token(wb, start); w = word_buffer_get_word_n(wb, index); - d = word_buffer_get_mcd(wb)->dico_array[MCD_WF_CPOS]; + if(d) label_code = dico_string2int(d, label); if(label_code == -1) diff --git a/maca_tools/src/mcf2json.c b/maca_tools/src/mcf2json.c index 2cf60bf46ded254306c8ba6750a9b1301107af0f..fc5bb5d5e8a5b974e76de0874ed50856e47ace61 100644 --- a/maca_tools/src/mcf2json.c +++ b/maca_tools/src/mcf2json.c @@ -176,7 +176,7 @@ void print_header(FILE *output_file, mcd *mcd_struct, char *filename) } -void print_link(FILE *output_file, word_buffer *wb, int index) +void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int index) { int gov_col = mcd_get_gov_col(word_buffer_get_mcd(wb)); int label_col = mcd_get_label_col(word_buffer_get_mcd(wb)); @@ -186,15 +186,15 @@ void print_link(FILE *output_file, word_buffer *wb, int index) fprintf(output_file, "{"); // fprintf(output_file, "\"orig\": %d, ", word_get_offset(w)); - fprintf(output_file, "\"orig\": %d, ", index); + fprintf(output_file, "\"orig\": %d, ", index - index_first_word); fprintf(output_file, "\"dest\":"); if(gov_col){ if((word_get_gov(w) == 0) || ((word_get_gov(w) + index) < 0)) - fprintf(output_file, "0"); + fprintf(output_file, "-1"); else{ word *gov = word_buffer_get_word_n(wb, word_get_gov(w) + index); // fprintf(output_file, "%d", word_get_offset(gov)); - fprintf(output_file, "%d", word_get_gov(w) + index); + fprintf(output_file, "%d", word_get_gov(w) + index - index_first_word); } } else @@ -228,7 +228,7 @@ void print_links(FILE *output_file, word_buffer *wb, int index_first_word, int i for(index = index_first_word; index <= index_last_word; index++){ if(first_link == 1) first_link = 0; else fprintf(output_file, ","); fprintf(output_file, "\n"); - print_link(output_file, wb, index); + print_link(output_file, wb, index_first_word, index); } fprintf(output_file," ]"); }