diff --git a/maca_common/include/mcd.h b/maca_common/include/mcd.h
index a86626fe4fa43f269579b86bb3ba769f525bf2ec..560fd6cb4baa6edcd911d9eee32f9b054f37513e 100644
--- a/maca_common/include/mcd.h
+++ b/maca_common/include/mcd.h
@@ -8,7 +8,7 @@
 
 #define MCD_INVALID_VALUE -1
 
-#define MCD_WF_NB 48
+#define MCD_WF_NB 51
 
 #define MCD_WF_ID 0
 #define MCD_WF_OFFSET 0 /* ID and OFFSET are synonymous */
@@ -61,6 +61,12 @@
 #define MCD_WF_Person 45
 #define MCD_WF_Tense 46
 
+#define MCD_WF_FILE 48
+#define MCD_WF_DIRECTORY 49
+#define MCD_WF_SPEAKER 50
+
+
+
 /*Abbr
 AdpType
 AdvType
diff --git a/maca_common/include/word.h b/maca_common/include/word.h
index 51b962200a24538c1c2840906ec3585735aad827..dcf18dc38b52b93a22209ccb73733a5eddd91b08 100644
--- a/maca_common/include/word.h
+++ b/maca_common/include/word.h
@@ -63,6 +63,11 @@ typedef struct _word {
 #define word_get_label(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
 #define word_get_stag(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
 #define word_get_sent_seg(w)       (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_SENT_SEG])
+
+#define word_get_file(w)           (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_FILE])
+#define word_get_directory(w)      (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_DIRECTORY])
+#define word_get_speaker(w)        (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_SPEAKER])
+
 #define word_get_A(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
 #define word_get_B(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
 #define word_get_C(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
diff --git a/maca_common/src/mcd.c b/maca_common/src/mcd.c
index 4a2348e14f4f3c406c1af3636ff3b5f3a35822d5..34fd53b7bfbae62d856189c29183af4f735cd3c3 100644
--- a/maca_common/src/mcd.c
+++ b/maca_common/src/mcd.c
@@ -512,6 +512,12 @@ int mcd_wf_code(char *wf)
   /* if(!strcmp(wf, "INT")) return MCD_WF_INT; */
   if(!strcmp(wf, "GOV")) return MCD_WF_GOV;
   if(!strcmp(wf, "SENT_SEG")) return MCD_WF_SENT_SEG;
+
+  if(!strcmp(wf, "FILE")) return MCD_WF_FILE;
+  if(!strcmp(wf, "DIRECTORY")) return MCD_WF_DIRECTORY;
+  if(!strcmp(wf, "SPEAKER")) return MCD_WF_SPEAKER;
+
+  
   if(!strcmp(wf, "A")) return MCD_WF_A;
   if(!strcmp(wf, "B")) return MCD_WF_B;
   if(!strcmp(wf, "C")) return MCD_WF_C;
diff --git a/maca_common/src/word.c b/maca_common/src/word.c
index 21e303e844bc5e7fb9030a56995420bf55504a3f..33f27da9cc0865cfa55130e4dd1b5e2964b496a3 100644
--- a/maca_common/src/word.c
+++ b/maca_common/src/word.c
@@ -22,7 +22,6 @@ word *word_new(char *input)
   w->wf_array[MCD_WF_GOV] = WORD_INVALID_GOV;
   w->form = NULL;
   w->form_char16 = NULL;
-
   w->index = -1;
   w->signature = -1;
   w->is_root = 0;
diff --git a/maca_tokenizer/src/en_tok_rules.l b/maca_tokenizer/src/en_tok_rules.l
index 891e8f30fcf0473d476ac63f6ff8a0e3712d586d..5ae382b86d38d454d0627fec446e54048bd5d88b 100644
--- a/maca_tokenizer/src/en_tok_rules.l
+++ b/maca_tokenizer/src/en_tok_rules.l
@@ -12,6 +12,7 @@ extern char *token;
 /*%option noyywrap*/
 %%
 
+#.*    ECHO;
 \<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);}
 [ \t]+   {maca_tokenizer_segment((char *)"", yytext);}
 [ ]*\.   {maca_tokenizer_segment((char *)".", yytext);}
diff --git a/maca_tokenizer/src/fr_tok_rules.l b/maca_tokenizer/src/fr_tok_rules.l
index 907beaaed67fccb2883872a844d1feaa36b5f8b1..964b702fbaa3749834e286966655d18f696eb04f 100644
--- a/maca_tokenizer/src/fr_tok_rules.l
+++ b/maca_tokenizer/src/fr_tok_rules.l
@@ -26,7 +26,7 @@ nosepar [^ \t\n]
 	if(defait_amalgames){
 	BEGIN(state_defait_amalgames);
         }
-
+#.*    ECHO;
 \<[^\>]*\>   {maca_tokenizer_segment((char *)"", yytext);}
 {separ}+     {maca_tokenizer_segment((char *)"", yytext);}
 \.   {maca_tokenizer_segment((char *)".", yytext);}
diff --git a/maca_tools/src/mcf2json.c b/maca_tools/src/mcf2json.c
index 4381177cfd388d8012380286f515dc1ae5fd19a8..bfca900cd8763a8ec7ed11fb4f07e0c16efe1db5 100644
--- a/maca_tools/src/mcf2json.c
+++ b/maca_tools/src/mcf2json.c
@@ -2,6 +2,9 @@
 #include<stdlib.h>
 #include<string.h>
 #include<getopt.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
 
 #include"mcd.h"
 #include"util.h"
@@ -16,6 +19,7 @@ typedef struct {
   char *mcf_filename;
   char *mcd_filename;
   mcd *mcd_struct;
+  char *root_dir;
 } context;
 
 void mcf2json_context_free(context *ctx)
@@ -31,6 +35,8 @@ void mcf2json_context_free(context *ctx)
       free(ctx->mcd_filename);
     if(ctx->mcd_struct)
       mcd_free(ctx->mcd_struct);
+    if(ctx->root_dir)
+      free(ctx->root_dir);
     free(ctx);
   }
 }
@@ -47,6 +53,7 @@ context *mcf2json_context_new(void)
   ctx->mcf_filename = NULL;
   ctx->mcd_filename = NULL;
   ctx->mcd_struct = NULL;
+  ctx->root_dir = NULL;
   return ctx;
 }
 
@@ -58,7 +65,7 @@ void mcf2json_context_general_help_message(context *ctx)
   fprintf(stderr, "\t-v --verbose          : activate verbose mode\n");
   fprintf(stderr, "\t-C --mcd              : mcd filename\n");
   fprintf(stderr, "\t-i --mcf              : mcf filename (read from stdin if absent)\n");
-  fprintf(stderr, "\t-o --conll            : conll filename (write to stdout if absent)\n");
+  fprintf(stderr, "\t-r --root             : root directory of the json files\n");
 }
 
 void mcf2json_check_options(context *ctx){
@@ -81,14 +88,14 @@ context *mcf2json_context_read_options(int argc, char *argv[])
       {"help",                no_argument,       0, 'h'},
       {"verbose",             no_argument,       0, 'v'},
       {"debug",               no_argument,       0, 'd'},
-      {"conll",               required_argument, 0, 'o'},
       {"mcd",                 required_argument, 0, 'C'}, 
       {"mcf",                 required_argument, 0, 'i'},
+      {"root",                required_argument, 0, 'r'},
     };
   optind = 0;
   opterr = 0;
   
-  while ((c = getopt_long (argc, argv, "hvdo:C:i:", long_options, &option_index)) != -1){ 
+  while ((c = getopt_long (argc, argv, "hvdC:i:r:", long_options, &option_index)) != -1){ 
     switch (c)
       {
       case 'd':
@@ -100,15 +107,15 @@ context *mcf2json_context_read_options(int argc, char *argv[])
       case 'v':
 	ctx->verbose = 1;
 	break;
-      case 'o':
-	ctx->conll_filename = strdup(optarg);
-	break;
       case 'i':
 	ctx->mcf_filename = strdup(optarg);
 	break;
       case 'C':
 	ctx->mcd_filename = strdup(optarg);
 	break;
+      case 'r':
+	ctx->root_dir = strdup(optarg);
+	break;
       }
   }
 
@@ -118,7 +125,6 @@ context *mcf2json_context_read_options(int argc, char *argv[])
   else{
     ctx->mcd_struct = mcd_build_wpmlgfs();
   }
-
   return ctx;
 }
 
@@ -129,7 +135,7 @@ void print_footer(FILE *output_file)
 }
 
 
-void print_header(FILE *output_file, mcd *mcd_struct)
+void print_header(FILE *output_file, mcd *mcd_struct, char *filename)
 {
   int pos_col =  mcd_get_pos_col(mcd_struct);
   int label_col =  mcd_get_label_col(mcd_struct);
@@ -143,6 +149,7 @@ void print_header(FILE *output_file, mcd *mcd_struct)
   fprintf(output_file, "\"header\":{\n");
   fprintf(output_file, "\"id\": \"\",\n");
   fprintf(output_file, "\"timestamp\": \"\",\n");
+  fprintf(output_file, "\"filename\": \"%s\",\n", filename);
 
   fprintf(output_file, "\"labels_segment\": [");
   for(i=0; i < dico_pos->nbelem; i++){
@@ -160,7 +167,7 @@ void print_header(FILE *output_file, mcd *mcd_struct)
   
   fprintf(output_file, "},\n");
   
-  fprintf(output_file, "\"annotations\":{\n");
+  fprintf(output_file, "\"annotation\":{\n");
   fprintf(output_file, "\"name\": \"\",\n");
   fprintf(output_file, "\"time_start\": \"\",\n");
   fprintf(output_file, "\"time_end\": \"\"\n");
@@ -227,16 +234,16 @@ void print_links(FILE *output_file, word_buffer *wb, int index_first_word, int i
 }
 
 
-void print_segment(FILE *output_file, word_buffer *wb, int index)
+void print_segment(FILE *output_file, word_buffer *wb, int index_first_word, int index)
 {
   int pos_col =  mcd_get_pos_col(word_buffer_get_mcd(wb));
   word *w = word_buffer_get_word_n(wb, index);
 
   fprintf(output_file, "{ ");
   /* fprintf(output_file, "\"start\": %d, ", word_get_offset(w)); */
-  fprintf(output_file, "\"start\": %d, ", index);
+  fprintf(output_file, "\"start\": %d, ", index - index_first_word);
   /* fprintf(output_file, "\"end\": %d, ", word_get_offset(w) + word_get_length(w) - 1); */
-  fprintf(output_file, "\"end\": %d, ", index);
+  fprintf(output_file, "\"end\": %d, ", index - index_first_word);
   fprintf(output_file, "\"label\": \"");
   
   if(pos_col != -1)
@@ -258,12 +265,12 @@ void print_segments(FILE *output_file, word_buffer *wb, int index_first_word, in
 {
   int index;
   int first_segment = 1;
-  
+
   fprintf(output_file, "\"segments\": [");
   for(index = index_first_word; index <= index_last_word; index++){
     if(first_segment == 1) first_segment = 0; else fprintf(output_file, ",");
     fprintf(output_file, "\n");
-    print_segment(output_file, wb, index);
+    print_segment(output_file, wb, index_first_word, index);
   }
   fprintf(output_file," ],\n");
 }
@@ -317,7 +324,7 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
 
 int main(int argc, char *argv[])
 {
-  FILE *output_file;
+  FILE *output_file = NULL;
   context *ctx = mcf2json_context_read_options(argc, argv);
   word_buffer *wb = NULL;
   word *w = NULL;
@@ -326,41 +333,107 @@ int main(int argc, char *argv[])
   int index_first_word;
   int index_last_word;
   int sentence_nb = 0;
-
+  char current_directory[1000];
+  char current_file[1000];
+  char previous_directory[1000];
+  char previous_file[1000];
+  char filename_for_header[1000];
+  char *root_directory = NULL;
+  char destination_file[1000];
+  char destination_dir[1000];
+  struct stat st = {0};
+  
   mcf2json_check_options(ctx);
 
   mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->mcf_filename);
-
-  output_file = (ctx->conll_filename)? myfopen_no_exit(ctx->conll_filename, "w"): stdout;
   wb = word_buffer_load_mcf(ctx->mcf_filename, ctx->mcd_struct);
 
-  print_header(output_file, ctx->mcd_struct);
-  do{
-    w = word_buffer_b0(wb);
-    if(new_sentence){
-      new_sentence = 0;
-      sentence_nb++;
-      index_first_word = word_buffer_get_current_index(wb);
-    }
-    if(word_get_sent_seg(w)){
-      index_last_word = word_buffer_get_current_index(wb);
-      new_sentence = 1;
-
-      if(first_sentence == 1)
-	first_sentence = 0;
-      else
-	fprintf(output_file, ",");
-      fprintf(output_file, "\n");
-      print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
+  if(ctx->root_dir){
+    if(stat(ctx->root_dir, &st) == -1) {
+      mkdir(ctx->root_dir, 0700);
+      fprintf(stderr, "creating directory %s\n", ctx->root_dir);
     }
-  } while(word_buffer_move_right(wb));
-  
-  print_footer(output_file);
-  if(ctx->conll_filename)
+    do{
+      w = word_buffer_b0(wb);
+      if(w == NULL) break;
+      word_sprint_col_n(current_directory, w, ctx->mcd_struct->wf2col[MCD_WF_DIRECTORY]);
+      word_sprint_col_n(current_file, w, ctx->mcd_struct->wf2col[MCD_WF_FILE]);
+            
+      if(strcmp(current_directory, previous_directory)){
+	strcpy(destination_dir, ctx->root_dir);
+	strcat(destination_dir, "/");
+	strcat(destination_dir, current_directory);
+	if (stat(destination_dir, &st) == -1) {
+	  mkdir(destination_dir, 0700);
+	  fprintf(stderr, "creating directory %s\n", destination_dir);
+	}
+      }
+      if(strcmp(current_file, previous_file)){
+	strcpy(destination_file, destination_dir);
+	strcat(destination_file, "/");
+	strcat(destination_file, current_file);
+	strcat(destination_file, ".json");
+	fprintf(stderr, "creating file %s\n", destination_file);
+	if(output_file){
+	  print_footer(output_file);
+	  fclose(output_file);
+	}
+	output_file = myfopen_no_exit(destination_file, "w");
+	strcpy(filename_for_header, current_directory);
+	strcat(filename_for_header, "/");
+	strcat(filename_for_header, current_file);
+	strcat(filename_for_header, ".json");
+	print_header(output_file, ctx->mcd_struct, filename_for_header);
+	first_sentence = 1;
+      }
+      if(new_sentence){
+	new_sentence = 0;
+	sentence_nb++;
+	index_first_word = word_buffer_get_current_index(wb);
+      }
+      if(word_get_sent_seg(w)){
+	index_last_word = word_buffer_get_current_index(wb);
+	new_sentence = 1;
+	
+	if(first_sentence == 1)
+	  first_sentence = 0;
+	else
+	  fprintf(output_file, ",");
+	fprintf(output_file, "\n");
+	print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
+      }
+      strcpy(previous_file, current_file);
+      strcpy(previous_directory, current_directory);
+    } while(word_buffer_move_right(wb));
+    print_footer(output_file);
     fclose(output_file);
-  mcf2json_context_free(ctx);
-
-
+  }
 
+  else{ //ctx->root_dir is NULL dump everything to stdout
+      output_file = stdout;
+      print_header(output_file, ctx->mcd_struct, "");
+      do{
+	w = word_buffer_b0(wb);
+	if(new_sentence){
+	  new_sentence = 0;
+	  sentence_nb++;
+	  index_first_word = word_buffer_get_current_index(wb);
+	}
+	if(word_get_sent_seg(w)){
+	  index_last_word = word_buffer_get_current_index(wb);
+	  new_sentence = 1;
+	  
+	  if(first_sentence == 1)
+	    first_sentence = 0;
+	  else
+	    fprintf(output_file, ",");
+	  fprintf(output_file, "\n");
+	  print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
+	}
+      } while(word_buffer_move_right(wb));
+      print_footer(output_file);
+  }
+  
+  mcf2json_context_free(ctx);
   return 0;
 }
diff --git a/maca_trans_parser/src/oracle_parser_arc_eager.c b/maca_trans_parser/src/oracle_parser_arc_eager.c
index e4aae787c960e5874ac87422af876ab9ae51db64..fd8199afb538789724edd59672825b787485252c 100644
--- a/maca_trans_parser/src/oracle_parser_arc_eager.c
+++ b/maca_trans_parser/src/oracle_parser_arc_eager.c
@@ -63,18 +63,19 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label)
     /* s0 is the root of the sentence */
     if((s0_label == root_label)
        //       && (word_get_label(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != root_label)
-              && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
+       && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
        ){
       return MVT_PARSER_ROOT;
     }
-    
+
     /* word on the top of the stack is an end of sentence marker */
     if((word_get_sent_seg(word_buffer_get_word_n(ref, s0_index)) == 1)
        //       && (word_get_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != 1)
-              && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
+       && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
        ){
       return MVT_PARSER_EOS;
     }
+
     /* LEFT ARC  b0 is the governor and s0 the dependent */
     if(s0_gov_index == b0_index){
       return movement_parser_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index)));