diff --git a/build/debug.sh b/build/debug.sh
deleted file mode 100755
index ec037e3c185c39ac81f861094f0c7bb033df1a78..0000000000000000000000000000000000000000
--- a/build/debug.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-cmake -DCMAKE_BUILD_TYPE=Debug ..
-make
-sudo make install
diff --git a/build/script.sh b/build/script.sh
deleted file mode 100755
index 9485f62e027a988d84b15033b788b4b2d5d1970a..0000000000000000000000000000000000000000
--- a/build/script.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-cmake ..
-make
-sudo make install
diff --git a/maca_tokenizer/CMakeLists.txt b/maca_tokenizer/CMakeLists.txt
index b2e874ca1fb06951ff72b2a18986de00673e3992..0952a2b1400718b74b095ef9aeb76e18976c014a 100644
--- a/maca_tokenizer/CMakeLists.txt
+++ b/maca_tokenizer/CMakeLists.txt
@@ -2,6 +2,7 @@ FLEX_TARGET(fr_tok_rules ./src/fr_tok_rules.l ${CMAKE_CURRENT_BINARY_DIR}/fr_lex
 FLEX_TARGET(en_tok_rules ./src/en_tok_rules.l ${CMAKE_CURRENT_BINARY_DIR}/en_lex.c)
 
 set(SOURCES ./src/context.c
+  ./src/maca_tokenizer_functions_for_lex.c
   ${FLEX_fr_tok_rules_OUTPUTS}
   ${FLEX_en_tok_rules_OUTPUTS})
 
diff --git a/maca_tokenizer/src/context.c b/maca_tokenizer/src/context.c
index 9c9236bab08ac75200c457759ded3a78611046e3..949bbcc201f1f26eafd7462a89e3750fa57287b0 100644
--- a/maca_tokenizer/src/context.c
+++ b/maca_tokenizer/src/context.c
@@ -32,18 +32,11 @@ context *context_new(void)
   ctx->maca_data_path = NULL;
   ctx->input_filename = NULL;
   ctx->output_filename = NULL;
+  ctx->print_offset = 0;
+  ctx->print_token_length = 0;
   return ctx;
 }
 
-void context_general_help_message(context *ctx)
-{
-    fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
-    fprintf(stderr, "Options:\n");
-    fprintf(stderr, "\t-h --help             : print this message\n");
-    fprintf(stderr, "\t-v --verbose          : activate verbose mode\n");
-    fprintf(stderr, "\t-r --hratio   <float> : set the occupation ratio of hash tables (default is 0.5)\n");
-}
-
 void context_input_help_message(context *ctx){
   fprintf(stderr, "\t-i --input  <file>  : input mcf file name\n");
 }
@@ -56,6 +49,22 @@ void context_language_help_message(context *ctx){
   fprintf(stderr, "\t-L --language  : identifier of the language to use\n");
 }
 
+void context_print_offset_message(context *ctx){
+  fprintf(stderr, "\t-p --print_offset  : print offset and token length\n");
+}
+
+void context_general_help_message(context *ctx)
+{
+    fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
+    fprintf(stderr, "Options:\n");
+    fprintf(stderr, "\t-h --help             : print this message\n");
+    fprintf(stderr, "\t-v --verbose          : activate verbose mode\n");
+    fprintf(stderr, "\t-r --hratio   <float> : set the occupation ratio of hash tables (default is 0.5)\n");
+    context_print_offset_message(ctx);
+}
+
+
+
 context *context_read_options(int argc, char *argv[])
 {
   int c;
@@ -64,11 +73,12 @@ context *context_read_options(int argc, char *argv[])
 
   ctx->program_name = strdup(argv[0]);
 
-  static struct option long_options[8] =
+  static struct option long_options[9] =
     {
       {"help",                no_argument,       0, 'h'},
       {"verbose",             no_argument,       0, 'v'},
       {"debug",               no_argument,       0, 'd'},
+      {"print_offset",        no_argument,       0, 'p'},
       {"input",               required_argument, 0, 'i'},
       {"output",              required_argument, 0, 'o'},
       {"mcd",                 required_argument, 0, 'C'}, 
@@ -78,7 +88,7 @@ context *context_read_options(int argc, char *argv[])
   optind = 0;
   opterr = 0;
   
-  while ((c = getopt_long (argc, argv, "hvdi:o:C:L:D:", long_options, &option_index)) != -1){ 
+  while ((c = getopt_long (argc, argv, "hvdpi:o:C:L:D:", long_options, &option_index)) != -1){ 
     switch (c)
       {
       case 'd':
@@ -105,6 +115,10 @@ context *context_read_options(int argc, char *argv[])
       case 'D':
 	ctx->maca_data_path = strdup(optarg);
 	break;
+      case 'p':
+	ctx->print_offset = 1;
+	ctx->print_token_length = 1;
+	break;
       }
   }
 
diff --git a/maca_tokenizer/src/context.h b/maca_tokenizer/src/context.h
index ce7e8f1d68cfd1f1137a59906142e631e0343f95..0ab89a195b33cd5b9c359a73821ae84ca0113c7b 100644
--- a/maca_tokenizer/src/context.h
+++ b/maca_tokenizer/src/context.h
@@ -18,6 +18,8 @@ typedef struct {
   mcd *mcd_struct;
   char *input_filename;
   char *output_filename;
+  int print_offset;
+  int print_token_length;
 } context;
 
 context *context_new(void);
diff --git a/maca_tokenizer/src/fr_tok_rules.l b/maca_tokenizer/src/fr_tok_rules.l
index 59b1909acc4c347676e79d025d2efb993d1c6af3..0f2b0cb8665e406e9c4c58eff105f603f1282bbb 100644
--- a/maca_tokenizer/src/fr_tok_rules.l
+++ b/maca_tokenizer/src/fr_tok_rules.l
@@ -1,6 +1,13 @@
 %{
-#include <stdio.h>
+#include<stdio.h>
+#include"maca_tokenizer_functions_for_lex.h"
+
 extern int defait_amalgames;
+/*extern int print_offset;
+extern int print_token_length;*/
+int offset = 0;
+int token_length = 0;
+char token[10000];
 %}
 
 %option prefix="fr"
@@ -12,36 +19,39 @@ extern int defait_amalgames;
 	if(defait_amalgames){
 	BEGIN(state_defait_amalgames);
 	}
+\<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);}
+[ \t]+   {maca_tokenizer_segment((char *)"", yytext);}
+[ ]*\.   {maca_tokenizer_segment((char *)".", yytext);}
+[ ]*\?   {maca_tokenizer_segment((char *)"?", yytext);}
+[ ]*\!   {maca_tokenizer_segment((char *)"!", yytext);}
+[ ]*,    {maca_tokenizer_segment((char *)",", yytext);}
+[ ]*:    {maca_tokenizer_segment((char *)":", yytext);}
+[ ]*;    {maca_tokenizer_segment((char *)";", yytext);}
+[ ]*…    {maca_tokenizer_segment((char *)"…", yytext);}
+[ ]*\)   {maca_tokenizer_segment((char *)")", yytext);}
+[ ]*»    {maca_tokenizer_segment((char *)"»", yytext);}
+\(       {maca_tokenizer_segment((char *)"((", yytext);}
+'        {maca_tokenizer_segment((char *)"'", yytext);}
+’        {maca_tokenizer_segment((char *)"'", yytext);}
+\"	 {maca_tokenizer_segment((char *)"\"", yytext);}
+«	 {maca_tokenizer_segment((char *)"«", yytext);}
+
+[0-9]+,[0-9]+ {maca_tokenizer_segment(yytext, yytext);}
 
-[0-9]+,[0-9]+ printf("%s", yytext);
-[ \t]+   printf("\n");
-[ ]*\.       printf("\n.");
-[ ]*\?       printf("\n?");
-[ ]*\!       printf("\n!");
-[ ]*,       printf("\n,");
-[ ]*:       printf("\n:");
-[ ]*;       printf("\n;");
-[ ]*…        printf("\n…");
-[ ]*\)        printf("\n)");
-[ ]*»        printf("\n»");
-\(        printf("(\n");
-'        printf("'\n");
-’        printf("'\n");
-\"	printf("\"\n");
-«	printf("»\n");
--je      printf("\n-je");
--tu      printf("\n-tu");
--on      printf("\n-on");
--ce      printf("\n-ce");
--t-il    printf("\n-t-il");
--il      printf("\n-il");
--t-ils   printf("\n-t-ils");
--ils     printf("\n-ils");
--t-elle  printf("\n-t-elle");
--elle    printf("\n-elle");
--t-elles printf("\n-t-elles");
--elles   printf("\n-elles");
-\n+      printf("\n");
+-je      {maca_tokenizer_segment((char *)"-je", yytext);}
+-tu      {maca_tokenizer_segment((char *)"-tu", yytext);}
+-on      {maca_tokenizer_segment((char *)"-on", yytext);}
+-ce      {maca_tokenizer_segment((char *)"-ce", yytext);}
+-t-il    {maca_tokenizer_segment((char *)"-t-il", yytext);}
+-il      {maca_tokenizer_segment((char *)"-il", yytext);}
+-t-ils   {maca_tokenizer_segment((char *)"-t-ils", yytext);}
+-ils     {maca_tokenizer_segment((char *)"-ils", yytext);}
+-t-elle  {maca_tokenizer_segment((char *)"-t-elle", yytext);}
+-elle    {maca_tokenizer_segment((char *)"-elle", yytext);}
+-t-elles {maca_tokenizer_segment((char *)"-t-elles", yytext);}
+-elles   {maca_tokenizer_segment((char *)"-elles", yytext);}
+\n+      {maca_tokenizer_segment((char *)"", yytext);}
+.        {maca_tokenizer_add_char_to_token(yytext[0]);}
 
 <state_defait_amalgames>{
 " du " printf("\nde\nle\n");
diff --git a/maca_tokenizer/src/maca_tokenizer.c b/maca_tokenizer/src/maca_tokenizer.c
index 6768e5da317a68bb98da83331151f63d61f1ffcc..524baa61360d1191c4d25aeef7794bb91c36769e 100644
--- a/maca_tokenizer/src/maca_tokenizer.c
+++ b/maca_tokenizer/src/maca_tokenizer.c
@@ -7,6 +7,8 @@ int enlex(void);
 int frlex(void);
 
 int defait_amalgames = 0;
+int print_offset = 0;
+int print_token_length = 0;
 
 void maca_tokenizer_help_message(context *ctx)
 {
@@ -32,6 +34,9 @@ int main(int argc, char* argv[])
 
   ctx = context_read_options(argc, argv);
   maca_tokenizer_check_options(ctx);
+
+  print_offset = ctx->print_offset;
+  print_token_length = ctx->print_token_length;
   
   if(!strcmp(ctx->language, "en"))
     enlex() ; 
diff --git a/maca_tokenizer/src/maca_tokenizer_functions_for_lex.c b/maca_tokenizer/src/maca_tokenizer_functions_for_lex.c
new file mode 100644
index 0000000000000000000000000000000000000000..8f058c970808911af8ac0f13b0591fe865c86819
--- /dev/null
+++ b/maca_tokenizer/src/maca_tokenizer_functions_for_lex.c
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <string.h>
+#include "char16.h"
+
+extern int offset;
+extern int token_length;
+extern char token[];
+/*extern char *yytext;*/
+extern int print_offset;
+extern int print_token_length;
+
+void maca_tokenizer_segment(char *separator, char *xx){
+  if(token_length != 0){
+    printf("%s", token);
+    if(print_offset)
+      printf("\t%d", offset);
+    if(print_token_length)
+      printf("\t%d", utf8_strlen(token));
+    printf("\n");
+  }
+  
+  offset += utf8_strlen(token);
+  token_length = 0;
+  
+  if(strlen(separator) != 0){
+    printf("%s", separator);
+    if(print_offset)
+      printf("\t%d", offset);
+    if(print_token_length)
+      printf("\t%d", (int) strlen(separator));
+    printf("\n");
+  }
+  offset += strlen(xx); 
+}
+
+
+void maca_tokenizer_add_char_to_token(char c)
+{
+  token[token_length] = c;
+  token_length++;
+  token[token_length] = 0;
+}
diff --git a/maca_tokenizer/src/maca_tokenizer_functions_for_lex.h b/maca_tokenizer/src/maca_tokenizer_functions_for_lex.h
new file mode 100644
index 0000000000000000000000000000000000000000..a57ffe63c753d77c3122fcbdfcc91c68a5fa5717
--- /dev/null
+++ b/maca_tokenizer/src/maca_tokenizer_functions_for_lex.h
@@ -0,0 +1,3 @@
+
+void maca_tokenizer_segment(char *separator, char *xx);
+void maca_tokenizer_add_char_to_token(char c);