Commit 91721019 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

a new function in json2mcf to get the id of the first token of a sentence

parent cf114feb
......@@ -251,12 +251,8 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat
else
word_set_pos(w, label_code);
}
}
void process_segment(json_attr_val *avl, word_buffer *wb)
{
int start, end;
......@@ -305,20 +301,43 @@ void process_link(json_attr_val *avl, word_buffer *wb)
// printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab);
}
void process_links(json_struct *segments, word_buffer *wb)
void process_links(json_struct *links, word_buffer *wb)
{
json_struct *link;
// printf("process_links\n");
for(link = segments->u.first; link != NULL; link = link->next){
for(link = links->u.first; link != NULL; link = link->next){
process_link(link->u.attr_val_list, wb);
}
}
int get_id_of_first_token_in_document(json_struct *document)
{
json_attr_val *avl = NULL;
json_struct *tokens, *token;
json_attr_val *avl2 = NULL;
for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){
if(!strcmp(avl->attr, (char *)"tokens")){
tokens = avl->val;
if(tokens){
token = tokens->u.first;
if(token){
for(avl2 = token->u.attr_val_list; avl2 != NULL; avl2 = avl2->next){
if(!strcmp(avl2->attr, (char *)"id"))
return (int)avl2->val->u.number;
}
}
}
}
}
return -1;
}
void process_document(json_struct *document, word_buffer *wb)
{
json_attr_val *avl = NULL;
// printf("process_document\n");
int offset = get_id_of_first_token_in_document(document);
printf("process_document, offset = %d\n", offset);
for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){
// if(!strcmp(avl->attr, (char *)"id")) printf("id = %s\n", avl->val->u.string);
if(!strcmp(avl->attr, (char *)"segments")) process_segments(avl->val, wb);
......
......@@ -184,8 +184,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in
word *w = word_buffer_get_word_n(wb, index);
fprintf(output_file, "{");
// fprintf(output_file, "\"orig\": %d, ", word_get_offset(w));
fprintf(output_file, "\"orig\": %d, ", index - index_first_word);
fprintf(output_file, "\"dest\":");
if(gov_col){
......@@ -193,7 +191,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in
fprintf(output_file, "-1");
else{
word *gov = word_buffer_get_word_n(wb, word_get_gov(w) + index);
// fprintf(output_file, "%d", word_get_offset(gov));
fprintf(output_file, "%d", word_get_gov(w) + index - index_first_word);
}
}
......@@ -240,9 +237,7 @@ void print_segment(FILE *output_file, word_buffer *wb, int index_first_word, int
word *w = word_buffer_get_word_n(wb, index);
fprintf(output_file, "{ ");
/* fprintf(output_file, "\"start\": %d, ", word_get_offset(w)); */
fprintf(output_file, "\"start\": %d, ", index - index_first_word);
/* fprintf(output_file, "\"end\": %d, ", word_get_offset(w) + word_get_length(w) - 1); */
fprintf(output_file, "\"end\": %d, ", index - index_first_word);
fprintf(output_file, "\"label\": \"");
......@@ -278,17 +273,13 @@ void print_segments(FILE *output_file, word_buffer *wb, int index_first_word, in
void print_token(FILE *output_file, word_buffer *wb, int index)
{
int form_col = mcd_get_form_col(word_buffer_get_mcd(wb));
int offset_col = mcd_get_offset_col(word_buffer_get_mcd(wb));
int length_col = mcd_get_length_col(word_buffer_get_mcd(wb));
word *w = word_buffer_get_word_n(wb, index);
char token[5000];
int length_token, i;
fprintf(output_file, "{ ");
if(word_get_offset(w) != -1)
fprintf(output_file, "\"id\": %d, ", word_get_offset(w));
else
fprintf(output_file, "\"id\": %d, ", word_get_index(w));
fprintf(output_file, "\"id\": %d, ", word_get_index(w));
fprintf(output_file, "\"word\": \"");
if(form_col != -1){
word_sprint_col_n(token, w, form_col);
......
......@@ -172,7 +172,7 @@ int movement_eos(config *c, int movement_code)
/* set word on the top of the stack to eos */
word_set_sent_seg(s0, 1);
stack_pop(config_get_stack(c));
stack_pop(config_get_stack(c));
config_push_mvt(c, movement_code, s0, NULL);
return 1;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment