diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index 136b5606bf8e3008687bd8f4f5aeebdd0cf76ab3..184608f1fa06277abe805ae448628e396a16f8fd 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -54,7 +54,7 @@ void word_buffer_print(FILE *f, word_buffer *wb); void word_buffer_print_compact(FILE *f, word_buffer *wb); int word_buffer_read_sentence(word_buffer *bw); word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct); -int word_buffer_locate_token(word_buffer *wb, int offset); +int word_buffer_locate_token_with_offset(word_buffer *wb, int offset); /* int word_buffer_is_empty(word_buffer *wb); int word_buffer_is_last(word_buffer *wb); diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c index a15542bdaab0670230e13d44e96823cb970b888b..7e52dd3c1227ef1c0e955bb7383d0c4a97bfc4f3 100644 --- a/maca_common/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -156,7 +156,7 @@ int word_buffer_read_sentence(word_buffer *wb) return wb->nbelem ; } -int word_buffer_locate_token(word_buffer *wb, int offset) +int word_buffer_locate_token_with_offset(word_buffer *wb, int offset) { int c, first, last, middle; word *w_middle; @@ -165,7 +165,7 @@ int word_buffer_locate_token(word_buffer *wb, int offset) middle = (first+last)/2; while (first <= last) { - // printf("first = %d middle = %d last = %d\n", first, middle, last); + // printf("first = %d middle = %d last = %d\n", first, middle, last); w_middle = word_buffer_get_word_n(wb, middle); // printf("w middle = %d current offset = %d\n", w_middle, word_get_offset(w_middle)); if (word_get_offset(w_middle) < offset) diff --git a/maca_tools/src/json2mcf.c b/maca_tools/src/json2mcf.c index 99c64c6e4b9ee549fefc6abb679c2866d71aeb41..886049265d0f0db804ae08eca046c5f28996b3ef 100644 --- a/maca_tools/src/json2mcf.c +++ b/maca_tools/src/json2mcf.c @@ -241,7 +241,7 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat if(status_lab && !strcmp(status_lab, "G")){ fprintf(stderr, "updating label of segment [%d-%d] with \"%s\"\n", start, end, label); - index = word_buffer_locate_token(wb, start); + index = word_buffer_locate_token_with_offset(wb, start); w = word_buffer_get_word_n(wb, index); if(d) @@ -283,6 +283,40 @@ void process_segments(json_struct *segments, word_buffer *wb) // {"orig": 1, "dest":2, "label": "suj", "status_link": "", "status_lab": "", "timestamp": "", "author": "", "target": ""}, +void update_link(word_buffer *wb, int orig, int dest, char *label, char *status_link, char *status_lab) +{ + + int index; + word *w; + int label_code = -1; + dico *d; + mcd *mcd_struct = NULL; + + mcd_struct = word_buffer_get_mcd(wb); + d = mcd_struct->dico_array[mcd_get_label_col(mcd_struct)]; + + if(status_lab && !strcmp(status_lab, "G")){ + fprintf(stderr, "updating label of link %d -> %d with \"%s\"\n", orig, dest, label); + index = orig; + w = word_buffer_get_word_n(wb, index); + + if(d) + label_code = dico_string2int(d, label); + if(label_code == -1) + fprintf(stderr, "label %s unknown\n", label); + else + word_set_label(w, label_code); + } + + if(status_link && !strcmp(status_link, "G")){ + fprintf(stderr, "updating governor of token %d with %d\n", orig, dest); + index = orig; + w = word_buffer_get_word_n(wb, index); + word_set_gov(w, dest); + } + + +} void process_link(json_attr_val *avl, word_buffer *wb) { @@ -298,7 +332,10 @@ void process_link(json_attr_val *avl, word_buffer *wb) if(!strcmp(av->attr, "status_link")){status_link = av->val->u.string; continue;} if(!strcmp(av->attr, "status_lab")){status_lab = av->val->u.string; continue;} } - // printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab); + fprintf(stderr, "link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab); + update_link(wb, orig, dest, label, status_link, status_lab); + + } void process_links(json_struct *links, word_buffer *wb)