diff --git a/maca_tools/src/json2mcf.c b/maca_tools/src/json2mcf.c index d44a70a551cce18713226ef03649b3a1116c7516..99c64c6e4b9ee549fefc6abb679c2866d71aeb41 100644 --- a/maca_tools/src/json2mcf.c +++ b/maca_tools/src/json2mcf.c @@ -251,12 +251,8 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat else word_set_pos(w, label_code); } - } - - - void process_segment(json_attr_val *avl, word_buffer *wb) { int start, end; @@ -305,20 +301,43 @@ void process_link(json_attr_val *avl, word_buffer *wb) // printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab); } -void process_links(json_struct *segments, word_buffer *wb) +void process_links(json_struct *links, word_buffer *wb) { json_struct *link; // printf("process_links\n"); - for(link = segments->u.first; link != NULL; link = link->next){ + for(link = links->u.first; link != NULL; link = link->next){ process_link(link->u.attr_val_list, wb); } } +int get_id_of_first_token_in_document(json_struct *document) +{ + json_attr_val *avl = NULL; + json_struct *tokens, *token; + json_attr_val *avl2 = NULL; + for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){ + if(!strcmp(avl->attr, (char *)"tokens")){ + tokens = avl->val; + if(tokens){ + token = tokens->u.first; + if(token){ + for(avl2 = token->u.attr_val_list; avl2 != NULL; avl2 = avl2->next){ + if(!strcmp(avl2->attr, (char *)"id")) + return (int)avl2->val->u.number; + } + } + } + } + } + return -1; +} + void process_document(json_struct *document, word_buffer *wb) { json_attr_val *avl = NULL; - // printf("process_document\n"); + int offset = get_id_of_first_token_in_document(document); + printf("process_document, offset = %d\n", offset); for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){ // if(!strcmp(avl->attr, (char *)"id")) printf("id = %s\n", avl->val->u.string); if(!strcmp(avl->attr, (char *)"segments")) process_segments(avl->val, wb); diff --git a/maca_tools/src/mcf2json.c b/maca_tools/src/mcf2json.c index e565d0e1deb2b94012ab202f542835569cb3fb9c..08e523b22570a704ff91d75def8d6a85d6036d5e 100644 --- a/maca_tools/src/mcf2json.c +++ b/maca_tools/src/mcf2json.c @@ -184,8 +184,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in word *w = word_buffer_get_word_n(wb, index); fprintf(output_file, "{"); - - // fprintf(output_file, "\"orig\": %d, ", word_get_offset(w)); fprintf(output_file, "\"orig\": %d, ", index - index_first_word); fprintf(output_file, "\"dest\":"); if(gov_col){ @@ -193,7 +191,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in fprintf(output_file, "-1"); else{ word *gov = word_buffer_get_word_n(wb, word_get_gov(w) + index); -// fprintf(output_file, "%d", word_get_offset(gov)); fprintf(output_file, "%d", word_get_gov(w) + index - index_first_word); } } @@ -240,9 +237,7 @@ void print_segment(FILE *output_file, word_buffer *wb, int index_first_word, int word *w = word_buffer_get_word_n(wb, index); fprintf(output_file, "{ "); - /* fprintf(output_file, "\"start\": %d, ", word_get_offset(w)); */ fprintf(output_file, "\"start\": %d, ", index - index_first_word); - /* fprintf(output_file, "\"end\": %d, ", word_get_offset(w) + word_get_length(w) - 1); */ fprintf(output_file, "\"end\": %d, ", index - index_first_word); fprintf(output_file, "\"label\": \""); @@ -278,17 +273,13 @@ void print_segments(FILE *output_file, word_buffer *wb, int index_first_word, in void print_token(FILE *output_file, word_buffer *wb, int index) { int form_col = mcd_get_form_col(word_buffer_get_mcd(wb)); - int offset_col = mcd_get_offset_col(word_buffer_get_mcd(wb)); int length_col = mcd_get_length_col(word_buffer_get_mcd(wb)); word *w = word_buffer_get_word_n(wb, index); char token[5000]; int length_token, i; fprintf(output_file, "{ "); - if(word_get_offset(w) != -1) - fprintf(output_file, "\"id\": %d, ", word_get_offset(w)); - else - fprintf(output_file, "\"id\": %d, ", word_get_index(w)); + fprintf(output_file, "\"id\": %d, ", word_get_index(w)); fprintf(output_file, "\"word\": \""); if(form_col != -1){ word_sprint_col_n(token, w, form_col); diff --git a/maca_trans_parser/src/movements.c b/maca_trans_parser/src/movements.c index 53e1e8ce51a87e3244f3f4797af240cc0d2eb3e8..89853eb2c9b0bb9485e43575470efc9e033dc6fd 100644 --- a/maca_trans_parser/src/movements.c +++ b/maca_trans_parser/src/movements.c @@ -172,7 +172,7 @@ int movement_eos(config *c, int movement_code) /* set word on the top of the stack to eos */ word_set_sent_seg(s0, 1); - stack_pop(config_get_stack(c)); + stack_pop(config_get_stack(c)); config_push_mvt(c, movement_code, s0, NULL); return 1; }