From cf114feb1717ac3d3d8431e71b7183075085fb39 Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Tue, 27 Mar 2018 11:24:48 +0200 Subject: [PATCH] little modifications in mcf2json: double quote is replaces by ", when no offset in input file, the token index are used --- maca_tools/src/mcf2json.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/maca_tools/src/mcf2json.c b/maca_tools/src/mcf2json.c index fc5bb5d..e565d0e 100644 --- a/maca_tools/src/mcf2json.c +++ b/maca_tools/src/mcf2json.c @@ -281,12 +281,25 @@ void print_token(FILE *output_file, word_buffer *wb, int index) int offset_col = mcd_get_offset_col(word_buffer_get_mcd(wb)); int length_col = mcd_get_length_col(word_buffer_get_mcd(wb)); word *w = word_buffer_get_word_n(wb, index); + char token[5000]; + int length_token, i; fprintf(output_file, "{ "); - fprintf(output_file, "\"id\": %d, ", word_get_offset(w)); + if(word_get_offset(w) != -1) + fprintf(output_file, "\"id\": %d, ", word_get_offset(w)); + else + fprintf(output_file, "\"id\": %d, ", word_get_index(w)); fprintf(output_file, "\"word\": \""); - if(form_col != -1) - word_print_col_n(output_file, w, form_col); + if(form_col != -1){ + word_sprint_col_n(token, w, form_col); + length_token = strlen(token); + for(i=0; i < length_token; i++){ + if(token[i] == '"') + fprintf(output_file, """); + else + fprintf(output_file, "%c", token[i]); + } + } else fprintf(output_file, "_"); fprintf(output_file, "\", "); -- GitLab