diff --git a/CMakeLists.txt b/CMakeLists.txt
index b4f6ff988b1692329822f3bc7cbaf1490f1c0c2b..552dabbfe8fce081f5646a284851b115cc04afef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,7 @@ add_subdirectory(maca_tools)
 add_subdirectory(perceptron)
 #add_subdirectory(maca_lemmatizer)
 add_subdirectory(maca_tokenizer)
+add_subdirectory(maca_lexer)
 add_subdirectory(maca_trans_parser)
 add_subdirectory(maca_crf_tagger)
 add_subdirectory(maca_graph_parser)
diff --git a/maca_common/CMakeLists.txt b/maca_common/CMakeLists.txt
index 58e4c1f2f79afc41d48a0ad738c0dc2149a11582..75b9fad8808f8e5a646425313efde9c554b62ae0 100644
--- a/maca_common/CMakeLists.txt
+++ b/maca_common/CMakeLists.txt
@@ -8,7 +8,15 @@ set(SOURCES  src/util.c
  src/word.c
  src/sentence.c
  src/word_buffer.c
+ src/trie.c
 )
 
 #compiling library
 add_library(maca_common STATIC ${SOURCES})
+
+
+#compiling, linking and installing executables
+#add_executable(test_trie ./test/test_trie.c)
+#target_link_libraries(test_trie maca_common)
+#install (TARGETS test_trie DESTINATION bin)
+
diff --git a/maca_common/include/trie.h b/maca_common/include/trie.h
new file mode 100644
index 0000000000000000000000000000000000000000..0ec45ad4334bcafdfa7b3a74a32f53877045a87d
--- /dev/null
+++ b/maca_common/include/trie.h
@@ -0,0 +1,38 @@
+#ifndef __TRIE__
+#define __TRIE__
+
+#include<stdio.h>
+
+typedef struct trans{
+  int destination;
+  int symbol;
+  struct trans *next;
+} trie_trans;
+
+typedef struct {
+  trie_trans *transitions;
+  int is_accept;
+  int fail;
+} trie_state;
+
+typedef struct {
+  trie_state **states;
+  int size;
+  int states_nb;
+} trie;
+
+
+trie_state *trie_state_new(trie_trans *transitions, int is_accept);
+void trie_state_free(trie_state *state);
+trie *trie_new(void);
+void trie_free(trie *t);
+trie_trans *trie_trans_new(int destination, int symbol, trie_trans *next);
+void trie_trans_free_rec(trie_trans *trans);
+int trie_add_state(trie *t);
+void trie_add_trans(trie *t, int origin, int symbol, int destination);
+void trie_add_word(trie *t, int *word, int length);
+void trie_print(FILE *f, trie *t);
+int trie_lookup(trie *t, int *word, int length);
+trie *trie_build_from_collection(char *filename);
+int trie_destination_state(trie *t, int origin, int symbol);
+#endif
diff --git a/maca_common/include/util.h b/maca_common/include/util.h
index 7046269758ef894325a2209bbcd8c89ed4c3755b..971bf1a1f0f54f4bd509346866d9285bd628584c 100644
--- a/maca_common/include/util.h
+++ b/maca_common/include/util.h
@@ -2,6 +2,8 @@
 #define __UTIL__
 
 #include<stdlib.h>
+#include<stdio.h>
+
 void myfree(void *ptr);
 void *memalloc(size_t s);
 FILE *myfopen(const char *path, const char *mode);
diff --git a/maca_common/src/trie.c b/maca_common/src/trie.c
new file mode 100644
index 0000000000000000000000000000000000000000..6494c2286da458ad2cdda966ac9db6a5a7165bee
--- /dev/null
+++ b/maca_common/src/trie.c
@@ -0,0 +1,183 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+#include "trie.h"
+#include "util.h"
+
+trie_state *trie_state_new(trie_trans *transitions, int is_accept)
+{
+  trie_state *state = memalloc(sizeof(trie_state));
+  state->transitions = transitions;
+  state->is_accept = is_accept;
+  state->fail = 0;
+  return state;
+}
+
+void trie_state_free(trie_state *state)
+{
+  if(state){
+    trie_trans_free_rec(state->transitions);
+    free(state);
+  }
+}
+
+trie *trie_new(void)
+{
+  trie *t = memalloc(sizeof(trie));
+  t->states = NULL;
+  t->size = 0;
+  t->states_nb = 0;
+  trie_add_state(t); /* initial state */
+  return t;
+}
+
+void trie_free(trie *t)
+{
+  int i;
+  if(t){
+    for(i=0; i < t->size; i++)
+      trie_state_free(t->states[i]);
+    free(t->states);
+    free(t);
+  }
+}
+
+trie_trans *trie_trans_new(int destination, int symbol, trie_trans *next)
+{
+  trie_trans *trans = memalloc(sizeof(trie_trans));
+  trans->destination = destination;
+  trans->symbol = symbol;
+  trans->next = next;
+  return trans;
+}
+
+void trie_trans_free_rec(trie_trans *trans)
+{
+  if(trans){
+    if(trans->next)
+      trie_trans_free_rec(trans->next);
+    else
+      free(trans);
+  }
+}
+
+int trie_add_state(trie *t)
+{
+  if(t->states_nb == t->size){
+    t->size = 2 * (t->size + 1);
+    t->states = (trie_state **) realloc(t->states, t->size * sizeof(trie_state *));
+  }
+  t->states[t->states_nb] = trie_state_new(NULL, 0);
+  t->states_nb++;
+  return t->states_nb - 1;
+}
+
+void trie_add_trans(trie *t, int origin, int symbol, int destination)
+{
+  /* make sure origin state exists */
+  while(origin >= t->states_nb)
+    trie_add_state(t);
+
+  /* make sure destination state exists */
+  while(destination >= t->states_nb)
+    trie_add_state(t);
+  
+  t->states[origin]->transitions = trie_trans_new(destination, symbol, t->states[origin]->transitions);  
+}
+
+void trie_add_word(trie *t, int *word, int length)
+{
+  int current_index = 0;
+  int current_state = 0;
+  trie_trans *current_trans = NULL;
+  int transition_exists = 1;
+  int destination;
+  int i;
+  
+  while((current_index < length) && transition_exists){
+    transition_exists = 0;
+    for(current_trans = t->states[current_state]->transitions; current_trans; current_trans = current_trans->next){
+      if(current_trans->symbol == word[current_index]){
+	current_state = current_trans->destination;
+	current_index++;
+	transition_exists = 1;
+	break;
+      }
+    }
+  }
+  while(current_index < length){
+    destination = trie_add_state(t);
+    trie_add_trans(t, current_state, word[current_index], destination);
+    current_index++;
+    current_state = destination;
+  }
+  t->states[current_state]->is_accept = 1;
+}
+
+void trie_print(FILE *f, trie *t)
+{
+  int i;
+  trie_trans *trans;
+  for(i=0; i < t->states_nb; i++){
+    fprintf(f, "state %d", i);
+    if(t->states[i]->is_accept) fprintf(f, " ACCEPT\n");
+    else fprintf(f, "\n");
+    fprintf(f, "FAIL = %d\n", t->states[i]->fail);
+    for(trans = t->states[i]->transitions; trans; trans = trans->next){
+      fprintf(f, "%d %d %d\n", i, trans->symbol, trans->destination);
+    }
+    fprintf(f, "\n");
+  }
+}
+
+int trie_lookup(trie *t, int *word, int length)
+{
+  int i;
+  int current_state = 0;
+  trie_trans *trans;
+  for(i=0; i < length;  i++){
+    for(trans = t->states[current_state]->transitions; trans; trans = trans->next){
+      if(trans->symbol == word[i]){
+	current_state = trans->destination;
+	break;
+      }
+    }
+     if(trans == NULL)
+       return 0;
+  }
+  return t->states[current_state]->is_accept;
+}
+
+
+trie *trie_build_from_collection(char *filename)
+{
+  trie *t = trie_new();
+  FILE *f = myfopen(filename, "r");
+  char buffer[1000];
+  int word[100];
+  int length;
+  char *token;
+
+  while(fgets(buffer, 10000, f)){
+    length = 0;
+    token = strtok(buffer, " ");
+    while(token){
+      word[length++] = atoi(token);
+      token = strtok(NULL, " ");
+    }
+    trie_add_word(t, word, length);
+  }
+  fclose(f);
+  return t;
+}
+
+int trie_destination_state(trie *t, int origin, int symbol)
+{
+  trie_trans *trans;
+  for(trans = t->states[origin]->transitions; trans; trans = trans->next){
+    if(trans->symbol == symbol)
+      return trans->destination;
+  }
+  return 0;
+}
diff --git a/maca_lexer/CMakeLists.txt b/maca_lexer/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c42bcff7ba6efe6b2aa6f55411ce1cee5dd230d
--- /dev/null
+++ b/maca_lexer/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(SOURCES src/context.c)
+
+##compiling library
+include_directories(src)
+add_library(maca_lexer_lib STATIC ${SOURCES})
+
+#compiling, linking and installing executables
+
+add_executable(extract_mwe_from_fplm ./src/extract_mwe_from_fplm.c)
+target_link_libraries(extract_mwe_from_fplm maca_common)
+install (TARGETS extract_mwe_from_fplm DESTINATION bin)
+
+add_executable(maca_lexer ./src/maca_lexer.c)
+target_link_libraries(maca_lexer maca_lexer_lib maca_common)
+install (TARGETS maca_lexer DESTINATION bin)
+
diff --git a/maca_lexer/src/context.c b/maca_lexer/src/context.c
new file mode 100644
index 0000000000000000000000000000000000000000..6a281826a707db0dab8c1375d4ca6c8a962595c9
--- /dev/null
+++ b/maca_lexer/src/context.c
@@ -0,0 +1,196 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<string.h>
+#include<unistd.h>
+#include<getopt.h>
+#include "context.h"
+#include "util.h"
+
+
+void context_set_linguistic_resources_filenames(context *ctx);
+
+void context_free(context *ctx)
+{
+  if(ctx->program_name) free(ctx->program_name);
+  if(ctx->input_filename) free(ctx->input_filename);
+  if(ctx->output_filename) free(ctx->output_filename);
+  if(ctx->fplm_filename) free(ctx->fplm_filename);
+  if(ctx->language) free(ctx->language);
+  if(ctx->maca_data_path) free(ctx->maca_data_path);
+  if(ctx->mwe_filename) free(ctx->mwe_filename);
+  if(ctx->mwe_tokens_dico_filename) free(ctx->mwe_tokens_dico_filename);
+  free(ctx);
+}
+
+context *context_new(void)
+{
+  context *ctx = (context *)memalloc(sizeof(context));
+  
+  ctx->help = 0;
+  ctx->verbose = 0;
+  ctx->debug_mode = 0;
+  ctx->program_name = NULL;
+  ctx->fplm_filename = NULL;
+  ctx->mcd_filename = NULL;
+  ctx->mcd_struct = NULL;
+  ctx->language = strdup("fr");
+  ctx->maca_data_path = NULL;
+  ctx->form_column = -1;
+  ctx->input_filename = NULL;
+  ctx->output_filename = NULL;
+  ctx->mwe_filename = NULL;
+  ctx->mwe_tokens_dico_filename = NULL;
+  return ctx;
+}
+
+void context_general_help_message(context *ctx)
+{
+    fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
+    fprintf(stderr, "Options:\n");
+    fprintf(stderr, "\t-h --help             : print this message\n");
+    fprintf(stderr, "\t-v --verbose          : activate verbose mode\n");
+    fprintf(stderr, "\t-r --hratio   <float> : set the occupation ratio of hash tables (default is 0.5)\n");
+}
+
+void context_input_help_message(context *ctx){
+  fprintf(stderr, "\t-i --input  <file>  : input mcf file name\n");
+}
+
+void context_form_column_help_message(context *ctx){
+  fprintf(stderr, "\t-F --form_column  <int>  : column containing form\n");
+}
+
+void context_fplm_help_message(context *ctx){
+  fprintf(stderr, "\t-f --fplm   <file>  : fplm (form pos lemma morpho) file\n");
+}
+
+void context_mcd_help_message(context *ctx){
+  fprintf(stderr, "\t-C --mcd   <file> : multi column description file name\n");
+}
+
+void context_language_help_message(context *ctx){
+  fprintf(stderr, "\t-L --language  : identifier of the language to use\n");
+}
+
+void context_maca_data_path_help_message(context *ctx){
+  fprintf(stderr, "\t-M --maca_data_path  : path to maca_data directory\n");
+}
+
+context *context_read_options(int argc, char *argv[])
+{
+  int c;
+  int option_index = 0;
+  context *ctx = context_new();
+
+  ctx->program_name = strdup(argv[0]);
+
+  static struct option long_options[12] =
+    {
+      {"help",                no_argument,       0, 'h'},
+      {"verbose",             no_argument,       0, 'v'},
+      {"debug",               no_argument,       0, 'd'},
+      {"input",               required_argument, 0, 'i'},
+      {"output",              required_argument, 0, 'o'},
+      {"mcd",                 required_argument, 0, 'C'}, 
+      {"language",            required_argument, 0, 'L'},
+      {"fplm",                required_argument, 0, 'f'},
+      {"form_column",         required_argument, 0, 'F'},
+      {"maca_data_path",      required_argument, 0, 'D'},
+      {"mwe",                 required_argument, 0, 'M'},
+      {"vocab",               required_argument, 0, 'V'}
+    };
+  optind = 0;
+  opterr = 0;
+  
+  while ((c = getopt_long (argc, argv, "hvdi:o:f:C:L:M:F:D:V:", long_options, &option_index)) != -1){ 
+    switch (c)
+      {
+      case 'd':
+	ctx->debug_mode = 1;
+	break;
+      case 'h':
+	ctx->help = 1;
+	break;
+      case 'v':
+	ctx->verbose = 1;
+	break;
+      case 'F':
+	ctx->form_column = atoi(optarg) - 1;
+	break;
+      case 'f':
+	ctx->fplm_filename = strdup(optarg);
+	break;
+      case 'i':
+	ctx->input_filename = strdup(optarg);
+	break;
+      case 'o':
+	ctx->output_filename = strdup(optarg);
+	break;
+      case 'C':
+	ctx->mcd_filename = strdup(optarg);
+	break;
+      case 'L':
+	ctx->language = strdup(optarg);
+	break;
+      case 'D':
+	ctx->maca_data_path = strdup(optarg);
+	break;
+      case 'V':
+	ctx->mwe_tokens_dico_filename = strdup(optarg);
+	break;
+      case 'M':
+	ctx->mwe_filename = strdup(optarg);
+	break;
+      }
+  }
+
+  context_set_linguistic_resources_filenames(ctx);
+
+
+  if(ctx->mcd_filename)
+    ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
+
+
+  if((ctx->mcd_filename == NULL) && (ctx->form_column == -1))
+    /* ctx->mcd_struct = mcd_build_conll07(); */
+    ctx->mcd_struct = mcd_build_wplgf();
+
+  return ctx;
+}
+
+void context_set_linguistic_resources_filenames(context *ctx)
+{
+  char absolute_path[500];
+  char absolute_filename[500];
+
+  absolute_path[0] = '\0';
+
+  if(ctx->maca_data_path)
+    strcat(absolute_path, ctx->maca_data_path);
+  else {
+      char *e = getenv("MACAON_DIR");
+      if (e != NULL) {
+	  strcat(absolute_path, e);	  
+      } else {
+	  fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
+      }
+  }
+
+	   
+  strcat(absolute_path, "/");
+  strcat(absolute_path, ctx->language);
+  strcat(absolute_path, "/bin/");
+
+  if(!ctx->mwe_filename){
+    strcpy(absolute_filename, absolute_path);
+    strcat(absolute_filename, DEFAULT_MWE_FILENAME);
+    ctx->mwe_filename = strdup(absolute_filename);
+  }
+
+  if(!ctx->mwe_tokens_dico_filename){
+    strcpy(absolute_filename, absolute_path);
+    strcat(absolute_filename, DEFAULT_MWE_TOKENS_DICO_FILENAME);
+    ctx->mwe_tokens_dico_filename = strdup(absolute_filename);
+  }
+  
+}
diff --git a/maca_lexer/src/context.h b/maca_lexer/src/context.h
new file mode 100644
index 0000000000000000000000000000000000000000..a1898faf3588e159738c450d898f69e6e8999596
--- /dev/null
+++ b/maca_lexer/src/context.h
@@ -0,0 +1,40 @@
+#ifndef __MACA_LEXER_CONTEXT__
+#define __MACA_LEXER_CONTEXT__
+
+#include "mcd.h"
+#include <stdlib.h>
+
+#define DEFAULT_MWE_TOKENS_DICO_FILENAME "mwe_tokens"
+#define DEFAULT_MWE_FILENAME "mwe"
+
+typedef struct {
+  int help;
+  int verbose;
+  int debug_mode;
+  char *program_name;
+  char *fplm_filename;
+  char *language;
+  char *maca_data_path;
+  char *mcd_filename;
+  mcd *mcd_struct;
+  int form_column;
+  char *input_filename;
+  char *output_filename;
+  char *mwe_filename;
+  char *mwe_tokens_dico_filename;
+} context;
+
+context *context_new(void);
+void context_free(context *ctx);
+
+context *context_read_options(int argc, char *argv[]);
+void context_general_help_message(context *ctx);
+void context_conll_help_message(context *ctx);
+void context_language_help_message(context *ctx);
+void context_fplm_help_message(context *ctx);
+void context_maca_data_path_help_message(context *ctx);
+void context_mcd_help_message(context *ctx);
+void context_form_column_help_message(context *ctx);
+void context_pos_column_help_message(context *ctx);
+
+#endif
diff --git a/maca_lexer/src/extract_mwe_from_fplm.c b/maca_lexer/src/extract_mwe_from_fplm.c
new file mode 100644
index 0000000000000000000000000000000000000000..6dec0cbea129e1ad1f421d9f491bf138126bda1e
--- /dev/null
+++ b/maca_lexer/src/extract_mwe_from_fplm.c
@@ -0,0 +1,77 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+#include"dico.h"
+#include"util.h"
+
+
+/* return 1 if form contains at least one space character */
+int form_is_complex(char *form)
+{
+  int i;
+  int l = strlen(form);
+  for(i=0; i < l; i++)
+    if(form[i] == ' ')
+      return 1;
+  return 0;
+}
+
+dico *decompose_mwe_in_fplm_file(char *fplm_filename, FILE *output_file, int debug_mode)
+{
+  char form[1000];
+  char pos[1000];
+  char lemma[1000];  
+  char morpho[1000];
+  int num = 0;
+  char buffer[10000];
+  FILE *f= myfopen(fplm_filename, "r");
+  int fields_nb;
+  char token[1000];
+  int l;
+  int i, j;
+  dico *d_tokens = dico_new("TOKENS", 100000);
+  int token_code;  
+  while(fgets(buffer, 10000, f)){
+    fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
+    if(fields_nb != 4){
+      if(debug_mode){
+	fprintf(stderr, "form = %s pos = %s lemma = %s\n", form, pos, lemma); 
+	fprintf(stderr, "incorrect fplm entry, skipping it\n");
+      }
+      continue;
+    }
+    if(form_is_complex(form)){
+      /* fprintf(stdout, "form = %s pos = %s lemma = %s\n", form, pos, lemma);    */
+      /* fprintf(stdout, "%s\n", form);    */
+      l = strlen(form);
+      j = 0;
+      for(i=0; i <= l; i++){
+	if((form[i] != ' ') && (i < l)){
+	  token[j++] = form[i];
+	}
+	else{
+	  token[j] = '\0';
+	  token_code = dico_add(d_tokens, token);
+	  /* fprintf(output_file, "token = %s code = %d\n", token, token_code); */
+	  fprintf(output_file, "%d", token_code);
+	  if(i != l)
+	    fprintf(output_file, " ");
+	  j = 0;
+	}
+      }
+      fprintf(output_file, "\n");
+    }
+  }
+  return d_tokens;
+}
+  
+int main(int argc, char *argv[])
+{
+
+  dico *d_tokens;
+
+  d_tokens = decompose_mwe_in_fplm_file(argv[1], stdout, 1);
+  dico_print("d_tokens.dico", d_tokens);
+  dico_free(d_tokens);
+}
diff --git a/maca_lexer/src/maca_lexer.c b/maca_lexer/src/maca_lexer.c
new file mode 100644
index 0000000000000000000000000000000000000000..5308fac5d036510738c80a77457d555225ae456f
--- /dev/null
+++ b/maca_lexer/src/maca_lexer.c
@@ -0,0 +1,102 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+#include"trie.h"
+#include"dico.h"
+#include"util.h"
+#include"context.h"
+
+int main(int argc, char *argv[])
+{
+  char buffer[10000];
+  char *buffer_copy;
+  char *form;
+  int form_code;
+  context *ctx;
+  int form_column;
+  FILE *f = NULL;
+  trie *mwe_trie;
+  dico *d_mwe_tokens = NULL;
+  int origin_state = 0;
+  int destination_state = 0;
+  int states_array[100];
+  int symbols_array[100];
+  int path_index = 0;
+  int i;
+  
+  ctx = context_read_options(argc, argv);
+  /* maca_lexer_check_options(ctx); */
+
+  
+  if(ctx->form_column != -1)
+    form_column = ctx->form_column;
+  else
+    form_column = ctx->mcd_struct->wf2col[MCD_WF_FORM];
+  
+  if(ctx->input_filename == NULL)
+    f = stdin;
+  else
+    f = myfopen(ctx->input_filename, "r");
+
+  mwe_trie = trie_build_from_collection(ctx->mwe_filename);
+  d_mwe_tokens = dico_read(ctx->mwe_tokens_dico_filename, 0.5);
+
+  /* trie_print(stdout, mwe_trie); */
+
+  
+  /* look for a valid word */
+  while(fgets(buffer, 10000, f)){
+    if(feof(f)) return 0; /* no more words to read */
+    if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')){
+      printf("\n");
+      continue;
+    }
+    
+    buffer[strlen(buffer)-1] = '\0';
+    form_code = dico_string2int(d_mwe_tokens, buffer);
+    symbols_array[path_index]  = form_code;
+    states_array[path_index] = (form_code == -1)? 0
+      : trie_destination_state(mwe_trie, (path_index == 0) ? 0 : states_array[path_index - 1], form_code);
+    /*    printf("buffer = %s ", buffer);
+    printf("code  = %d\n", form_code);
+    
+    
+    for(i=0; i <= path_index; i++){
+      printf("%d ", states_array[i]);
+    }
+    printf("\n");
+    for(i=0; i <= path_index; i++){
+      printf("%d ", symbols_array[i]);
+    }
+    printf("\n");
+    */
+    if(states_array[path_index] == 0){ /* in initial state of trie */
+      if(path_index == 0){ /* nothing has been recognized */
+	printf("%s\n", buffer);
+      }
+      else{
+	if(mwe_trie->states[states_array[path_index - 1]]->is_accept){
+	  for(i=0; i < path_index; i++){
+	    if(i > 0) printf("#");
+	    printf("%s", dico_int2string(d_mwe_tokens, symbols_array[i]));
+	  }
+	  printf("\n");
+	}
+	else{
+	  for(i=0; i < path_index; i++){
+	    printf("%s\n", dico_int2string(d_mwe_tokens, symbols_array[i]));
+	  }
+	}
+	printf("%s\n", buffer);
+      }
+      path_index = 0;
+    }
+    else{
+      path_index++;
+    }
+    
+  }
+  return 0;
+}
+
diff --git a/maca_tokenizer/main.c b/maca_tokenizer/main.c
index f0bef4ca78b9ae992a5cd6fb7a24a17600e157ac..f3586e084fd9aee749368fc0d07e1223ef9e0f30 100644
--- a/maca_tokenizer/main.c
+++ b/maca_tokenizer/main.c
@@ -1,5 +1,8 @@
+int defait_amalgames = 0;
+
 int main(int argc, char* argv[]) {
 
+  if(argc > 1) defait_amalgames = 1;
   yylex() ; 
 
   return 0;
diff --git a/maca_tokenizer/tok_rules.l b/maca_tokenizer/tok_rules.l
index d60af0ee7f05e26fa5bb0718370c3581dbfc5fee..4cd823d15c0535b7bcf9fcb1b34385d5789355f4 100644
--- a/maca_tokenizer/tok_rules.l
+++ b/maca_tokenizer/tok_rules.l
@@ -1,16 +1,26 @@
 %{
 #include <stdio.h>
+extern int defait_amalgames;
 %}
 %option noyywrap
+%s state_defait_amalgames
+%s state_num
 %%
-" "+ printf("\n");
+	if(defait_amalgames){
+	BEGIN(state_defait_amalgames);
+	}
+
+<state_num>[0-9]*,[0-9]* printf("%s", yytext);
+[ \t]+ printf("\n");
 \.  printf("\n.");
 \,  printf("\n,");
 ' printf("'\n");
 ’ printf("'\n");
 \n+ printf("\n");
-du printf("de\nle");
-des printf("de\nles");
-au printf("à\nle");
-aux printf("à\nles");
+<state_defait_amalgames>{
+" du " printf("\nde\nle\n");
+" des " printf("\nde\nles\n");
+" au " printf("\nà\nle\n");
+" aux " printf("\nà\nles\n");
+}
 %%