From cf114feb1717ac3d3d8431e71b7183075085fb39 Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Tue, 27 Mar 2018 11:24:48 +0200
Subject: [PATCH] little modifications in mcf2json: double quote is replaces by
 &quot, when no offset in input file, the token index are used

---
 maca_tools/src/mcf2json.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/maca_tools/src/mcf2json.c b/maca_tools/src/mcf2json.c
index fc5bb5d..e565d0e 100644
--- a/maca_tools/src/mcf2json.c
+++ b/maca_tools/src/mcf2json.c
@@ -281,12 +281,25 @@ void print_token(FILE *output_file, word_buffer *wb, int index)
   int offset_col =  mcd_get_offset_col(word_buffer_get_mcd(wb));
   int length_col =  mcd_get_length_col(word_buffer_get_mcd(wb));
   word *w = word_buffer_get_word_n(wb, index);
+  char token[5000];
+  int length_token, i;
 
   fprintf(output_file, "{ ");
-  fprintf(output_file, "\"id\": %d, ", word_get_offset(w));
+  if(word_get_offset(w) != -1)
+    fprintf(output_file, "\"id\": %d, ", word_get_offset(w));
+  else
+    fprintf(output_file, "\"id\": %d, ", word_get_index(w));
   fprintf(output_file, "\"word\": \"");
-  if(form_col != -1)
-    word_print_col_n(output_file, w, form_col);
+  if(form_col != -1){
+    word_sprint_col_n(token, w, form_col);
+    length_token = strlen(token);
+    for(i=0; i < length_token; i++){
+      if(token[i] == '"')
+	fprintf(output_file, "&quot");
+      else
+	fprintf(output_file, "%c", token[i]);
+    }
+   }
   else
     fprintf(output_file, "_");
   fprintf(output_file, "\", ");
-- 
GitLab