Skip to content
Snippets Groups Projects
Commit 91721019 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

a new function in json2mcf to get the id of the first token of a sentence

parent cf114feb
No related branches found
No related tags found
No related merge requests found
...@@ -251,12 +251,8 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat ...@@ -251,12 +251,8 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat
else else
word_set_pos(w, label_code); word_set_pos(w, label_code);
} }
} }
void process_segment(json_attr_val *avl, word_buffer *wb) void process_segment(json_attr_val *avl, word_buffer *wb)
{ {
int start, end; int start, end;
...@@ -305,20 +301,43 @@ void process_link(json_attr_val *avl, word_buffer *wb) ...@@ -305,20 +301,43 @@ void process_link(json_attr_val *avl, word_buffer *wb)
// printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab); // printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab);
} }
void process_links(json_struct *segments, word_buffer *wb) void process_links(json_struct *links, word_buffer *wb)
{ {
json_struct *link; json_struct *link;
// printf("process_links\n"); // printf("process_links\n");
for(link = segments->u.first; link != NULL; link = link->next){ for(link = links->u.first; link != NULL; link = link->next){
process_link(link->u.attr_val_list, wb); process_link(link->u.attr_val_list, wb);
} }
} }
int get_id_of_first_token_in_document(json_struct *document)
{
json_attr_val *avl = NULL;
json_struct *tokens, *token;
json_attr_val *avl2 = NULL;
for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){
if(!strcmp(avl->attr, (char *)"tokens")){
tokens = avl->val;
if(tokens){
token = tokens->u.first;
if(token){
for(avl2 = token->u.attr_val_list; avl2 != NULL; avl2 = avl2->next){
if(!strcmp(avl2->attr, (char *)"id"))
return (int)avl2->val->u.number;
}
}
}
}
}
return -1;
}
void process_document(json_struct *document, word_buffer *wb) void process_document(json_struct *document, word_buffer *wb)
{ {
json_attr_val *avl = NULL; json_attr_val *avl = NULL;
// printf("process_document\n"); int offset = get_id_of_first_token_in_document(document);
printf("process_document, offset = %d\n", offset);
for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){ for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){
// if(!strcmp(avl->attr, (char *)"id")) printf("id = %s\n", avl->val->u.string); // if(!strcmp(avl->attr, (char *)"id")) printf("id = %s\n", avl->val->u.string);
if(!strcmp(avl->attr, (char *)"segments")) process_segments(avl->val, wb); if(!strcmp(avl->attr, (char *)"segments")) process_segments(avl->val, wb);
......
...@@ -184,8 +184,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in ...@@ -184,8 +184,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in
word *w = word_buffer_get_word_n(wb, index); word *w = word_buffer_get_word_n(wb, index);
fprintf(output_file, "{"); fprintf(output_file, "{");
// fprintf(output_file, "\"orig\": %d, ", word_get_offset(w));
fprintf(output_file, "\"orig\": %d, ", index - index_first_word); fprintf(output_file, "\"orig\": %d, ", index - index_first_word);
fprintf(output_file, "\"dest\":"); fprintf(output_file, "\"dest\":");
if(gov_col){ if(gov_col){
...@@ -193,7 +191,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in ...@@ -193,7 +191,6 @@ void print_link(FILE *output_file, word_buffer *wb, int index_first_word, int in
fprintf(output_file, "-1"); fprintf(output_file, "-1");
else{ else{
word *gov = word_buffer_get_word_n(wb, word_get_gov(w) + index); word *gov = word_buffer_get_word_n(wb, word_get_gov(w) + index);
// fprintf(output_file, "%d", word_get_offset(gov));
fprintf(output_file, "%d", word_get_gov(w) + index - index_first_word); fprintf(output_file, "%d", word_get_gov(w) + index - index_first_word);
} }
} }
...@@ -240,9 +237,7 @@ void print_segment(FILE *output_file, word_buffer *wb, int index_first_word, int ...@@ -240,9 +237,7 @@ void print_segment(FILE *output_file, word_buffer *wb, int index_first_word, int
word *w = word_buffer_get_word_n(wb, index); word *w = word_buffer_get_word_n(wb, index);
fprintf(output_file, "{ "); fprintf(output_file, "{ ");
/* fprintf(output_file, "\"start\": %d, ", word_get_offset(w)); */
fprintf(output_file, "\"start\": %d, ", index - index_first_word); fprintf(output_file, "\"start\": %d, ", index - index_first_word);
/* fprintf(output_file, "\"end\": %d, ", word_get_offset(w) + word_get_length(w) - 1); */
fprintf(output_file, "\"end\": %d, ", index - index_first_word); fprintf(output_file, "\"end\": %d, ", index - index_first_word);
fprintf(output_file, "\"label\": \""); fprintf(output_file, "\"label\": \"");
...@@ -278,16 +273,12 @@ void print_segments(FILE *output_file, word_buffer *wb, int index_first_word, in ...@@ -278,16 +273,12 @@ void print_segments(FILE *output_file, word_buffer *wb, int index_first_word, in
void print_token(FILE *output_file, word_buffer *wb, int index) void print_token(FILE *output_file, word_buffer *wb, int index)
{ {
int form_col = mcd_get_form_col(word_buffer_get_mcd(wb)); int form_col = mcd_get_form_col(word_buffer_get_mcd(wb));
int offset_col = mcd_get_offset_col(word_buffer_get_mcd(wb));
int length_col = mcd_get_length_col(word_buffer_get_mcd(wb)); int length_col = mcd_get_length_col(word_buffer_get_mcd(wb));
word *w = word_buffer_get_word_n(wb, index); word *w = word_buffer_get_word_n(wb, index);
char token[5000]; char token[5000];
int length_token, i; int length_token, i;
fprintf(output_file, "{ "); fprintf(output_file, "{ ");
if(word_get_offset(w) != -1)
fprintf(output_file, "\"id\": %d, ", word_get_offset(w));
else
fprintf(output_file, "\"id\": %d, ", word_get_index(w)); fprintf(output_file, "\"id\": %d, ", word_get_index(w));
fprintf(output_file, "\"word\": \""); fprintf(output_file, "\"word\": \"");
if(form_col != -1){ if(form_col != -1){
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment