diff --git a/CMakeLists.txt b/CMakeLists.txt
index 766110fb2776ae8d42ae6209ee1a24e0c8974226..85d4e4bcdce693d1da0be09721efe1118d02158e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,10 @@ project(macaon2)
 
 add_definitions("-Wall")
 
+include_directories(maca_common/include)
+
+add_subdirectory(maca_common)
+add_subdirectory(maca_lemmatizer)
 add_subdirectory(maca_trans_parser)
 
 #set(CMAKE_INSTALL_PREFIX ../)
diff --git a/maca_common/CMakeLists.txt b/maca_common/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0dbc552b3f9756143c299ffe07eb309fdbda97c
--- /dev/null
+++ b/maca_common/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(SOURCES  src/util.c
+ src/hash.c
+ src/dico.c
+ src/word_emb.c
+ src/mcd.c
+ src/dico_vec.c
+ src/feat_types.c
+)
+
+#compiling library
+add_library(maca_common STATIC ${SOURCES})
diff --git a/maca_trans_parser/src/dico.h b/maca_common/include/dico.h
similarity index 100%
rename from maca_trans_parser/src/dico.h
rename to maca_common/include/dico.h
diff --git a/maca_trans_parser/src/dico_vec.h b/maca_common/include/dico_vec.h
similarity index 100%
rename from maca_trans_parser/src/dico_vec.h
rename to maca_common/include/dico_vec.h
diff --git a/maca_trans_parser/src/feat_types.h b/maca_common/include/feat_types.h
similarity index 100%
rename from maca_trans_parser/src/feat_types.h
rename to maca_common/include/feat_types.h
diff --git a/maca_trans_parser/src/hash.h b/maca_common/include/hash.h
similarity index 100%
rename from maca_trans_parser/src/hash.h
rename to maca_common/include/hash.h
diff --git a/maca_trans_parser/src/mcd.h b/maca_common/include/mcd.h
similarity index 72%
rename from maca_trans_parser/src/mcd.h
rename to maca_common/include/mcd.h
index aad932aaec50f2f4ac9346d1c6edc9783d61ae04..e759789f3667de115689dc6275f032161f3d3784 100644
--- a/maca_trans_parser/src/mcd.h
+++ b/maca_common/include/mcd.h
@@ -18,15 +18,19 @@
 typedef struct {
   int nb_col;
   int type2col[FEAT_TYPE_NB];
-  int *col2type;
+  /* int *col2type; */
   int *type;
+  char **type_str;
   int *representation;
   char **filename;
   dico **dico_array;
   word_emb **word_emb_array;
 } mcd;
 
-mcd *mcd_read(char *mcd_filename, char *corpus_filename, dico_vec *vocabs);
+mcd *mcd_build_conll07(void);
+mcd *mcd_read(char *mcd_filename);
+void mcd_link_to_dico(mcd *m, dico_vec *vocabs);
+void mcd_extract_dico_from_corpus(mcd *m, char *corpus_filename);
 void mcd_free(mcd *m);
 int mcd_get_code(mcd *m, char *str, int col);
 dico_vec *mcd_build_dico_vec(mcd *mcd_struct);
diff --git a/maca_trans_parser/src/util.h b/maca_common/include/util.h
similarity index 100%
rename from maca_trans_parser/src/util.h
rename to maca_common/include/util.h
diff --git a/maca_trans_parser/src/word_emb.h b/maca_common/include/word_emb.h
similarity index 100%
rename from maca_trans_parser/src/word_emb.h
rename to maca_common/include/word_emb.h
diff --git a/maca_trans_parser/src/dico.c b/maca_common/src/dico.c
similarity index 100%
rename from maca_trans_parser/src/dico.c
rename to maca_common/src/dico.c
diff --git a/maca_trans_parser/src/dico_vec.c b/maca_common/src/dico_vec.c
similarity index 100%
rename from maca_trans_parser/src/dico_vec.c
rename to maca_common/src/dico_vec.c
diff --git a/maca_trans_parser/src/feat_types.c b/maca_common/src/feat_types.c
similarity index 100%
rename from maca_trans_parser/src/feat_types.c
rename to maca_common/src/feat_types.c
diff --git a/maca_trans_parser/src/hash.c b/maca_common/src/hash.c
similarity index 100%
rename from maca_trans_parser/src/hash.c
rename to maca_common/src/hash.c
diff --git a/maca_common/src/mcd.c b/maca_common/src/mcd.c
new file mode 100644
index 0000000000000000000000000000000000000000..a11cae21294ab9a28cf5f51a558d0d1bb44e35fa
--- /dev/null
+++ b/maca_common/src/mcd.c
@@ -0,0 +1,315 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+#include "mcd.h"
+#include "util.h"
+#include "dico.h"
+#include "word_emb.h"
+
+
+mcd *mcd_new(int nb_col)
+{
+  mcd *m = (mcd *)memalloc(sizeof(mcd));
+  int i;
+  m->nb_col = nb_col;
+
+  for(i=0; i < FEAT_TYPE_NB; i++)
+    m->type2col[i] = -1;
+
+  m->representation = (int *)memalloc(nb_col * sizeof(int));
+  m->type =           (int *)memalloc(nb_col * sizeof(int));
+  m->type_str =           (char **)memalloc(nb_col * sizeof(char *));
+  /* m->col2type =       (int *)memalloc(nb_col * sizeof(int)); */
+  m->filename =       (char **)memalloc(nb_col * sizeof(char *));
+  m->dico_array =     (dico **)memalloc(nb_col * sizeof(dico *));
+  m->word_emb_array = (word_emb **)memalloc(nb_col * sizeof(word_emb *));
+  
+  for(i=0; i < nb_col; i++){
+    m->representation[i] = MCD_REPRESENTATION_NULL;
+    m->type[i] = -1;
+    m->type_str[i] = NULL;
+    /* m->col2type[i] = -1; */
+    m->filename[i] = NULL;
+    m->dico_array[i] = NULL;
+    m->word_emb_array[i] = NULL;;
+  }
+  return m;
+}
+
+void mcd_free(mcd *m)
+{
+  int i;
+  for(i=0; i < m->nb_col; i++){
+    if(m->dico_array[i]) dico_free(m->dico_array[i]);
+    if(m->word_emb_array[i]) word_emb_free(m->word_emb_array[i]);
+    if(m->type_str[i]) free(m->type_str[i]);
+  }
+  free(m->representation);
+  free(m->filename);
+  free(m->dico_array);
+  free(m->word_emb_array);
+  free(m->type_str);
+  free(m->type);
+  free(m);
+}
+
+int mcd_get_code(mcd *m, char *str, int col){
+  if(m->representation[col] == MCD_REPRESENTATION_VOCAB)
+    return dico_string2int(m->dico_array[col], str);
+  if(m->representation[col] == MCD_REPRESENTATION_EMB)
+    return word_emb_get_code(m->word_emb_array[col], str);
+  if(m->representation[col] == MCD_REPRESENTATION_INT)
+    return atoi(str);
+  return MCD_INVALID_VALUE;
+}
+
+int mcd_max_column_index_in_file(char *mcd_filename)
+{
+  int max_col = -1;
+  FILE *f = myfopen(mcd_filename, "r");
+  char buffer[1000]; /* ugly */
+  int column;
+  char type[100];
+  char representation[100];
+  char filename[500]; /* ugly */
+  int fields_number;
+  int line_number = 0;
+
+  while(fgets(buffer, 1000, f)){
+    line_number++;
+    if(feof(f)) break;
+     if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
+    fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename);
+    if(fields_number != 4){
+      fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename);
+      continue;
+    }
+    if(column > max_col) max_col = column;
+  }
+  return max_col;
+}
+
+
+/* takes as argument an mcd structure (m) and the name of a corpus file (corpus_filename) */
+/* populates the vocabularies of m with values found in corpus_filename */
+
+void mcd_extract_dico_from_corpus(mcd *m, char *corpus_filename)
+{
+  int column;
+
+  for(column=0; column < m->nb_col; column++){
+    if((m->representation[column] == MCD_REPRESENTATION_VOCAB)
+       /* && (strcmp(m->filename[column], "_")) */
+       && (m->dico_array[column] == NULL)){
+      m->dico_array[column] = dico_extract_from_corpus(corpus_filename, column, m->type_str[column]);
+      fprintf(stderr, "extracting dico %s from corpus\n", m->type_str[column]);
+    }
+  }
+}
+
+
+/* takes as argument an mcd structure (m) and a dictionary vector (vocabs) */
+/* links the vocabularies of m to vocabularies of vocabs (based on their names) */
+
+void mcd_link_to_dico(mcd *m, dico_vec *vocabs)
+{
+  int column;
+  for(column=0; column < m->nb_col; column++){
+    if((m->representation[column] == MCD_REPRESENTATION_VOCAB)
+       && (!strcmp(m->filename[column], "_"))
+       && (m->dico_array[column] == NULL)){
+      m->dico_array[column] = dico_vec_get_dico(vocabs, m->type_str[column]);
+      fprintf(stderr, "linking to dico %s\n", m->type_str[column]);
+    }
+  }
+}
+
+/* read an multi column description file and produces an mcd structure */
+
+mcd *mcd_read(char *mcd_filename)
+{
+  int column;
+  char type[100];
+  char representation[100];
+  char filename[500]; /* ugly */
+  int fields_number;
+  int line_number = 0;
+  char buffer[1000]; /* ugly */
+  int nb_col = mcd_max_column_index_in_file(mcd_filename);
+  mcd *m = mcd_new(nb_col + 1);
+  FILE *f = myfopen(mcd_filename, "r");
+  /* int first = 1; */
+
+  while(fgets(buffer, 1000, f)){
+    line_number++;
+    if(feof(f)) break;
+     if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
+     fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename);
+     if(fields_number != 4){
+       /* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */
+       continue;
+     }
+     fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename);
+     m->type[column] = feat_type_string2int(type);
+     m->type_str[column] = strdup(type);
+     if(m->type[column] == -1){
+       fprintf(stderr, "in line %d of mcd file %s invalid type, I'm skipping it\n", line_number, mcd_filename);	
+       continue;
+     }
+    m->type2col[m->type[column]] = column;
+
+    if(!strcmp(representation, "_")) m->representation[column] = MCD_REPRESENTATION_NULL;
+    else if(!strcmp(representation, "EMB")) m->representation[column] = MCD_REPRESENTATION_EMB;
+    else if(!strcmp(representation, "VOCAB")) m->representation[column] = MCD_REPRESENTATION_VOCAB;
+    else if(!strcmp(representation, "INT")) m->representation[column] = MCD_REPRESENTATION_INT;
+    else{ 
+      fprintf(stderr, "in line %d of mcd file %s invalid mode of representation, I'm skipping it\n", line_number, mcd_filename);	  
+      m->representation[column] = MCD_REPRESENTATION_NULL;
+    }
+    if(m->representation[column] != MCD_REPRESENTATION_NULL)
+      m->filename[column] = strdup(filename);
+    
+    if(strcmp(m->filename[column], "_")){
+      if(m->representation[column] == MCD_REPRESENTATION_EMB){
+	fprintf(stderr, "loading word embedding %s\n", m->filename[column]);
+	m->word_emb_array[column] = word_emb_load(m->filename[column]);
+      }
+      else if(m->representation[column] == MCD_REPRESENTATION_VOCAB){
+	  fprintf(stderr, "loading dico %s\n", m->filename[column]);
+	  m->dico_array[column] = dico_read(m->filename[column], 0.5);
+      }
+    }
+  }
+  fclose(f);
+  return m;
+}
+
+
+mcd *mcd_build_conll07(void)
+{
+  mcd *m = mcd_new(8);
+  m->type[0]=FEAT_TYPE_INDEX;
+  m->type_str[0]=strdup("INDEX");
+  m->representation[0]= MCD_REPRESENTATION_INT;
+  m->type2col[FEAT_TYPE_INDEX] = 0;
+  
+  m->type[1]=FEAT_TYPE_FORM;
+  m->type_str[1]=strdup("FORM");
+  m->representation[1]= MCD_REPRESENTATION_VOCAB;
+  m->type2col[FEAT_TYPE_FORM] = 1;
+  
+  m->type[2]=FEAT_TYPE_LEMMA;
+  m->type_str[2]=strdup("LEMMA");
+  m->representation[2]= MCD_REPRESENTATION_VOCAB;
+  m->type2col[FEAT_TYPE_LEMMA] = 2;
+  
+  m->type[3]=FEAT_TYPE_CPOS;
+  m->type_str[3]=strdup("CPOS");
+  m->representation[3]= MCD_REPRESENTATION_VOCAB;
+  m->type2col[FEAT_TYPE_CPOS] = 3;
+  
+  m->type[4]=FEAT_TYPE_POS;
+  m->type_str[4]=strdup("POS");
+  m->representation[4]= MCD_REPRESENTATION_VOCAB;
+  m->type2col[FEAT_TYPE_POS] = 4;
+  
+  m->type[5]=FEAT_TYPE_FEATS;
+  m->type_str[5]=strdup("FEATS");
+  m->representation[5]= MCD_REPRESENTATION_VOCAB;
+  m->type2col[FEAT_TYPE_FEATS] = 5;
+  
+  m->type[6]=FEAT_TYPE_GOV;
+  m->type_str[6]=strdup("GOV");
+  m->representation[6]= MCD_REPRESENTATION_INT;
+  m->type2col[FEAT_TYPE_GOV] = 6;
+  
+  m->type[7]=FEAT_TYPE_LABEL;
+  m->type_str[7]=strdup("LABEL");
+  m->representation[7]= MCD_REPRESENTATION_VOCAB;
+  m->type2col[FEAT_TYPE_LABEL] = 7;
+  
+  return m;
+}
+
+mcd *mcd_read_old(char *mcd_filename, char *corpus_filename, dico_vec *vocabs)
+{
+  int column;
+  char type[100];
+  char representation[100];
+  char filename[500]; /* ugly */
+  int fields_number;
+  int line_number = 0;
+  char buffer[1000]; /* ugly */
+  int nb_col = mcd_max_column_index_in_file(mcd_filename);
+  mcd *m = mcd_new(nb_col + 1);
+  FILE *f = myfopen(mcd_filename, "r");
+  /* int first = 1; */
+
+  while(fgets(buffer, 1000, f)){
+    line_number++;
+    if(feof(f)) break;
+     if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
+     fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename);
+     if(fields_number != 4){
+       /* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */
+       continue;
+     }
+     fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename);
+     m->type[column] = feat_type_string2int(type);
+     if(m->type[column] == -1){
+       fprintf(stderr, "in line %d of mcd file %s invalid type, I'm skipping it\n", line_number, mcd_filename);	
+       continue;
+     }
+    m->type2col[m->type[column]] = column;
+    /* m->col2type[column] = m->type[column]; */
+    if(!strcmp(representation, "_")) m->representation[column] = MCD_REPRESENTATION_NULL;
+    else if(!strcmp(representation, "EMB")) m->representation[column] = MCD_REPRESENTATION_EMB;
+    else if(!strcmp(representation, "VOCAB")) m->representation[column] = MCD_REPRESENTATION_VOCAB;
+    else if(!strcmp(representation, "INT")) m->representation[column] = MCD_REPRESENTATION_INT;
+    else{ 
+      fprintf(stderr, "in line %d of mcd file %s invalid mode of representation, I'm skipping it\n", line_number, mcd_filename);	  
+      m->representation[column] = MCD_REPRESENTATION_NULL;
+    }
+    if(m->representation[column] != MCD_REPRESENTATION_NULL){
+      m->filename[column] = strdup(filename);
+      if(m->representation[column] == MCD_REPRESENTATION_EMB){
+	fprintf(stderr, "loading word embedding %s\n", m->filename[column]);
+	m->word_emb_array[column] = word_emb_load(m->filename[column]);
+      }
+      else if(m->representation[column] == MCD_REPRESENTATION_VOCAB){
+	if(!strcmp(m->filename[column], "_")){
+	  if(corpus_filename){
+	    fprintf(stderr, "extracting dico %s from corpus\n", type);
+	    m->dico_array[column] = dico_extract_from_corpus(corpus_filename, column, type);
+	  }
+	  else if(vocabs){
+	    fprintf(stderr, "linking to dico %s\n", type);
+	    m->dico_array[column] = dico_vec_get_dico(vocabs, type);
+	  }
+	  if(m->dico_array[column] == NULL)
+	    fprintf(stderr, "cannot find dico %s\n", type);
+	}
+	else{
+	  fprintf(stderr, "loading dico %s\n", m->filename[column]);
+	  m->dico_array[column] = dico_read(m->filename[column], 0.5);
+	}
+      }
+    }
+  }	
+  fclose(f);
+  return m;
+}
+
+
+dico_vec *mcd_build_dico_vec(mcd *mcd_struct)
+{
+  dico_vec *dv = dico_vec_new();
+  int i;
+  for(i=0; i < mcd_struct->nb_col; i++){
+    if(mcd_struct->dico_array[i]){
+      dico_vec_add(dv, mcd_struct->dico_array[i]);
+    }
+  }
+  return dv;
+}
diff --git a/maca_trans_parser/src/util.c b/maca_common/src/util.c
similarity index 100%
rename from maca_trans_parser/src/util.c
rename to maca_common/src/util.c
diff --git a/maca_trans_parser/src/word_emb.c b/maca_common/src/word_emb.c
similarity index 100%
rename from maca_trans_parser/src/word_emb.c
rename to maca_common/src/word_emb.c
diff --git a/maca_lemmatizer/CMakeLists.txt b/maca_lemmatizer/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4f7e9da1fccafb07525ef1d29add9ffeb7a5a6f
--- /dev/null
+++ b/maca_lemmatizer/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(SOURCES src/context.c)
+
+##compiling library
+include_directories(src)
+add_library(maca_lemmatizer_lib STATIC ${SOURCES})
+
+#compiling, linking and installing executables
+
+add_executable(maca_lemmatizer ./src/maca_lemmatizer.c)
+target_link_libraries(maca_lemmatizer maca_lemmatizer_lib)
+target_link_libraries(maca_lemmatizer maca_common)
+install (TARGETS maca_lemmatizer DESTINATION bin)
diff --git a/maca_lemmatizer/src/context.c b/maca_lemmatizer/src/context.c
new file mode 100644
index 0000000000000000000000000000000000000000..3ba024477c1376898047ca1e91da20470ec6465a
--- /dev/null
+++ b/maca_lemmatizer/src/context.c
@@ -0,0 +1,149 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<string.h>
+#include<unistd.h>
+#include<getopt.h>
+#include "context.h"
+#include "util.h"
+
+
+#define STANDARD_FPLM_FILENAME "fplm"
+
+void context_set_linguistic_resources_filenames(context *ctx);
+
+void context_free(context *ctx)
+{
+  if(ctx->program_name) free(ctx->program_name);
+  if(ctx->conll_filename) free(ctx->conll_filename);
+  if(ctx->fplm_filename) free(ctx->fplm_filename);
+  if(ctx->language) free(ctx->language);
+  if(ctx->maca_data_path) free(ctx->maca_data_path);
+  free(ctx);
+}
+
+context *context_new(void)
+{
+  context *ctx = (context *)memalloc(sizeof(context));
+
+  ctx->help = 0;
+  ctx->verbose = 0;
+  ctx->debug_mode = 0;
+  ctx->program_name = NULL;
+  ctx->conll_filename = NULL;
+  ctx->fplm_filename = NULL;
+  ctx->mcd_filename = NULL;
+  ctx->mcd_struct = NULL;
+  ctx->language = strdup("fr");
+  ctx->maca_data_path = NULL;
+  return ctx;
+}
+
+void context_general_help_message(context *ctx)
+{
+    fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
+    fprintf(stderr, "Options:\n");
+    fprintf(stderr, "\t-h --help             : print this message\n");
+    fprintf(stderr, "\t-v --verbose          : activate verbose mode\n");
+    fprintf(stderr, "\t-r --hratio   <float> : set the occupation ratio of hash tables (default is 0.5)\n");
+}
+
+void context_conll_help_message(context *ctx){
+  fprintf(stderr, "\t-i --conll  <file>  : conll file name\n");
+}
+void context_fplm_help_message(context *ctx){
+  fprintf(stderr, "\t-f --fplm   <file>  : fplm (form pos lemma morpho) file\n");
+}
+void context_mcd_help_message(context *ctx){
+  fprintf(stderr, "\t-m --mcd   <file> : multi column description file name\n");
+}
+void context_language_help_message(context *ctx){
+  fprintf(stderr, "\t-C --language  : identifier of the language to use\n");
+}
+void context_maca_data_path_help_message(context *ctx){
+  fprintf(stderr, "\t-M --maca_data_path  : path to maca_data directory\n");
+}
+
+context *context_read_options(int argc, char *argv[])
+{
+  int c;
+  int option_index = 0;
+  context *ctx = context_new();
+
+  ctx->program_name = strdup(argv[0]);
+
+  static struct option long_options[8] =
+    {
+      {"help",                no_argument,       0, 'h'},
+      {"verbose",             no_argument,       0, 'v'},
+      {"debug",               no_argument,       0, 'd'},
+      {"conll",               required_argument, 0, 'i'},
+      {"mcd",                 required_argument, 0, 'm'}, 
+      {"language",            required_argument, 0, 'C'},
+      {"fplm",                required_argument, 0, 'f'},
+      {"maca_data_path",      required_argument, 0, 'M'}
+    };
+  optind = 0;
+  opterr = 0;
+  
+  while ((c = getopt_long (argc, argv, "hvdi:f:m:C:M:", long_options, &option_index)) != -1){ 
+    switch (c)
+      {
+      case 'd':
+	ctx->debug_mode = 1;
+	break;
+      case 'h':
+	ctx->help = 1;
+	break;
+      case 'v':
+	ctx->verbose = 1;
+	break;
+      case 'f':
+	ctx->fplm_filename = strdup(optarg);
+	break;
+      case 'i':
+	ctx->conll_filename = strdup(optarg);
+	break;
+      case 'm':
+	ctx->mcd_filename = strdup(optarg);
+	ctx->mcd_struct = mcd_read(ctx->mcd_filename);
+	break;
+      case 'C':
+	ctx->language = strdup(optarg);
+	break;
+      case 'M':
+	ctx->maca_data_path = strdup(optarg);
+	break;
+      }
+  }
+
+  context_set_linguistic_resources_filenames(ctx);
+
+  if(ctx->mcd_filename == NULL)
+    ctx->mcd_struct = mcd_build_conll07();
+
+  return ctx;
+}
+
+void context_set_linguistic_resources_filenames(context *ctx)
+{
+  char absolute_path[500];
+  char absolute_filename[500];
+
+  absolute_path[0] = '\0';
+
+  if(ctx->maca_data_path)
+    strcat(absolute_path, ctx->maca_data_path);
+  else
+    strcat(absolute_path, getenv("MACAON_DIR"));
+	   
+  strcat(absolute_path, "/");
+  strcat(absolute_path, ctx->language);
+  strcat(absolute_path, "/bin/");
+
+  if(!ctx->fplm_filename){
+    strcpy(absolute_filename, absolute_path);
+    strcat(absolute_filename, STANDARD_FPLM_FILENAME);
+    ctx->fplm_filename = strdup(absolute_filename);
+  }
+  
+}
diff --git a/maca_lemmatizer/src/context.h b/maca_lemmatizer/src/context.h
new file mode 100644
index 0000000000000000000000000000000000000000..5352eb3f71cebaf88b5c571a81ae7051816d207c
--- /dev/null
+++ b/maca_lemmatizer/src/context.h
@@ -0,0 +1,31 @@
+#ifndef __MACA_LEMMATIZER_CONTEXT__
+#define __MACA_LEMMATIZER_CONTEXT__
+
+#include "mcd.h"
+#include <stdlib.h>
+
+typedef struct {
+  int help;
+  int verbose;
+  int debug_mode;
+  char *program_name;
+  char *conll_filename;
+  char *fplm_filename;
+  char *language;
+  char *maca_data_path;
+  char *mcd_filename;
+  mcd *mcd_struct;
+} context;
+
+context *context_new(void);
+void context_free(context *ctx);
+
+context *context_read_options(int argc, char *argv[]);
+void context_general_help_message(context *ctx);
+void context_conll_help_message(context *ctx);
+void context_language_help_message(context *ctx);
+void context_maca_data_path_help_message(context *ctx);
+void context_mcd_help_message(context *ctx);
+
+
+#endif
diff --git a/maca_lemmatizer/src/maca_lemmatizer.c b/maca_lemmatizer/src/maca_lemmatizer.c
new file mode 100644
index 0000000000000000000000000000000000000000..6737bbb366ea379386d79a8a182a367943c3a9a1
--- /dev/null
+++ b/maca_lemmatizer/src/maca_lemmatizer.c
@@ -0,0 +1,154 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<ctype.h>
+
+#include"util.h"
+#include"hash.h"
+#include"mcd.h"
+#include"context.h"
+
+void maca_lemmatizer_help_message(context *ctx)
+{
+  context_general_help_message(ctx);
+  fprintf(stderr, "INPUT\n");
+  context_conll_help_message(ctx);
+  context_mcd_help_message(ctx);
+  context_language_help_message(ctx);
+  context_maca_data_path_help_message(ctx);
+  context_fplm_help_message(ctx);
+}
+
+
+void maca_lemmatizer_check_options(context *ctx){
+  if(!ctx->conll_filename
+     /*     || !ctx->perc_model_filename
+     || !ctx->mcd_filename
+     || !ctx->vocabs_filename
+     || !ctx->features_model_filename*/
+     || ctx->help
+     ){
+    maca_lemmatizer_help_message(ctx);
+    exit(1);
+  }
+}
+
+char **read_fplm_file(char *fplm_filename, hash *form_pos_ht)
+{
+  char form[1000];
+  char pos[1000];
+  char lemma[1000];  
+  char morpho[1000];
+  int num = 0;
+  char **lemma_array;
+  int lemma_array_size = 10000;
+
+  FILE *f= myfopen(fplm_filename, "r");
+  int fields_nb;
+
+  lemma_array = (char **)memalloc(lemma_array_size * sizeof(char *));
+
+  while(!feof(f)){ 
+    fields_nb = fscanf(f, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
+    if(fields_nb != 4){
+      fprintf(stderr, "incorrect fplm entry, skipping it\n");
+      continue;
+    }
+    strcat(form, "/");
+    strcat(form, pos);
+    hash_add(form_pos_ht, strdup(form), num);
+
+    if(num >= lemma_array_size){
+      lemma_array_size = 2 * (lemma_array_size) + 1;
+      lemma_array = realloc(lemma_array, (lemma_array_size) * sizeof(char *));
+    }
+
+    /* if(lemma_array[num] == NULL) */
+    lemma_array[num] = strdup(lemma);
+    num++;
+  }
+  /* fprintf(stderr, "%d entries loaded\n", num); */
+  return lemma_array;
+}
+
+char *to_lower_string(char *s)
+{
+  int i;
+  for(i=0; i < strlen(s); i++)
+    s[i] = tolower(s[i]);
+  return s;
+}
+
+
+int main(int argc, char *argv[])
+{
+  hash *form_pos_ht = hash_new(1000000);
+  char buffer[10000];
+  char *form;
+  char *pos;
+  char *token;
+  int column_nb;
+  char form_pos[500];
+  char *lemma;
+  int index_form_pos;
+  char **lemma_array;
+  context *ctx;
+  
+  ctx = context_read_options(argc, argv);
+  maca_lemmatizer_check_options(ctx);
+
+  FILE *f = myfopen(ctx->conll_filename, "r");
+
+  lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht);
+  
+  /* look for a valid word */
+  while(fgets(buffer, 10000, f)){
+    if(feof(f)) return 0; /* no more words to read */
+    if((buffer[0] == '\n') || (buffer[0] == ' ')){
+      printf("\n");
+      continue;
+    }
+    
+    buffer[strlen(buffer)-1] = '\0';
+    printf("%s", buffer);
+    token = strtok(buffer, "\t");
+    column_nb = 0;
+    form = NULL;
+    pos = NULL;
+    do{
+      if((column_nb < ctx->mcd_struct->nb_col) && (ctx->mcd_struct->type[column_nb] == FEAT_TYPE_FORM))
+	form = strdup(token);
+      if((column_nb < ctx->mcd_struct->nb_col) && (ctx->mcd_struct->type[column_nb] == FEAT_TYPE_POS))
+	pos = strdup(token);
+      column_nb++;
+    } while((token = strtok(NULL , "\t")));
+    
+    strcpy(form_pos, form);
+    strcat(form_pos, "/");
+    strcat(form_pos, pos);
+    index_form_pos = hash_get_val(form_pos_ht, form_pos);
+    if(index_form_pos != HASH_INVALID_VAL){
+      lemma = lemma_array[index_form_pos];
+    }
+    else{
+      to_lower_string(form_pos);
+      index_form_pos = hash_get_val(form_pos_ht, form_pos);
+      if(index_form_pos != HASH_INVALID_VAL){
+	lemma = lemma_array[index_form_pos];
+      }
+      else
+	lemma = form;
+    }
+    
+    /* printf("form = %s pos = %s (%s) lemma = %s\n", form, pos, form_pos, lemma);  */
+    printf("\t%s\n", lemma);
+    
+    if(pos)free(pos);
+    if(form)free(form);
+  }
+  free(lemma_array);
+  hash_free(form_pos_ht);
+  
+  return 0;
+}
+
diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt
index 739776227acc569240ce915852f59890fdd24ae4..c638a2c3ad89a17f9036cc525123a67608e9d166 100644
--- a/maca_trans_parser/CMakeLists.txt
+++ b/maca_trans_parser/CMakeLists.txt
@@ -1,10 +1,8 @@
 set(SOURCES src/context.c
- src/dico_vec.c
  src/feat_desc.c
  src/feature_table.c
  src/movement.c
  src/sentence.c
- src/util.c
  src/feat_fct.c
  src/feat_vec.c
  src/global_feat_vec.c
@@ -12,24 +10,17 @@ set(SOURCES src/context.c
  src/simple_decoder.c
  src/cf_file.c
  src/feat_lib.c
- src/hash.c
  src/perceptron.c
  src/stack.c
  src/word.c
  src/config2feat_vec.c
  src/depset.c
  src/feat_model.c
- src/word_emb.c
  src/config.c
- src/dico.c
- src/feat_types.c
- src/mcd.c
  src/queue.c
  src/beam.c
 )
 
-
-
 #compiling library
 include_directories(src)
 add_library(transparse STATIC ${SOURCES})
@@ -38,23 +29,28 @@ add_library(transparse STATIC ${SOURCES})
 
 add_executable(maca_trans_parser_conll2cff ./src/transform_treebank.c)
 target_link_libraries(maca_trans_parser_conll2cff transparse)
+target_link_libraries(maca_trans_parser_conll2cff maca_common)
 install (TARGETS maca_trans_parser_conll2cff DESTINATION bin)
 
 add_executable(maca_trans_parser ./src/decode.c)
 target_link_libraries(maca_trans_parser transparse)
+target_link_libraries(maca_trans_parser maca_common)
 install (TARGETS maca_trans_parser DESTINATION bin)
 
 add_executable(maca_trans_parser_train ./src/train_perceptron.c)
 target_compile_options(maca_trans_parser_train INTERFACE -Wall)
 target_link_libraries(maca_trans_parser_train transparse)
+target_link_libraries(maca_trans_parser_train maca_common)
 install (TARGETS maca_trans_parser_train DESTINATION bin)
 
 add_executable(maca_trans_parser_train_from_cff ./src/train.c)
 target_link_libraries(maca_trans_parser_train_from_cff transparse)
+target_link_libraries(maca_trans_parser_train_from_cff maca_common)
 install (TARGETS maca_trans_parser_train_from_cff DESTINATION bin)
 
 add_executable(maca_trans_parser_cff_cutoff ./src/cff_cutoff.c)
 target_link_libraries(maca_trans_parser_cff_cutoff transparse)
+target_link_libraries(maca_trans_parser_cff_cutoff maca_common)
 install (TARGETS maca_trans_parser_cff_cutoff DESTINATION bin)
 
 #add_executable(test_w2v ./src/test_w2v.c)
diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c
index 1d98c287d639216f45ea26206cd6c99850601c45..b39cafb1a3f01d51aaccbf1e460b2bc3c8154923 100644
--- a/maca_trans_parser/src/context.c
+++ b/maca_trans_parser/src/context.c
@@ -256,6 +256,7 @@ context *context_read_options(int argc, char *argv[])
 	break;
       case 'C':
 	ctx->mcd_filename = strdup(optarg);
+	ctx->mcd_struct = mcd_read(ctx->mcd_filename);
 	break;
       case 'F':
 	ctx->features_model_filename = strdup(optarg);
@@ -281,11 +282,15 @@ context *context_read_options(int argc, char *argv[])
     ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
     }*/
   
+  /*
   if(ctx->features_model && ctx->mcd_struct)
     feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
-
+  */
   context_set_linguistic_resources_filenames(ctx);
 
+  if(ctx->mcd_filename == NULL){
+    ctx->mcd_struct = mcd_build_conll07();
+  }
 
   return ctx;
 }
@@ -319,11 +324,11 @@ void context_set_linguistic_resources_filenames(context *ctx)
     ctx->vocabs_filename = strdup(absolute_filename);
   }
 
-  if(!ctx->mcd_filename){
+  /*  if(!ctx->mcd_filename){
     strcpy(absolute_filename, absolute_path);
     strcat(absolute_filename, STANDARD_MULTI_COL_DESC_FILENAME);
     ctx->mcd_filename = strdup(absolute_filename);
-  }
+    }*/
 
   if(!ctx->features_model_filename){
     strcpy(absolute_filename, absolute_path);
diff --git a/maca_trans_parser/src/decode.c b/maca_trans_parser/src/decode.c
index 909f923b47584acc679e0f0554d9cbb050e0e806..5f8679dc5eb01c010c890d89dd3f7ffa171e1735 100644
--- a/maca_trans_parser/src/decode.c
+++ b/maca_trans_parser/src/decode.c
@@ -54,7 +54,9 @@ int main(int argc, char *argv[])
   decode_check_options(ctx);
 
   ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
-  ctx->mcd_struct = mcd_read(ctx->mcd_filename, NULL, ctx->vocabs);
+  mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
+
+
   ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
 
   if(ctx->dico_labels == NULL){
@@ -69,7 +71,7 @@ int main(int argc, char *argv[])
 
   /* when in stream mode, force to renumber the tokens (ugly !) */
   if(ctx->stream_mode){
-    ctx->mcd_struct->col2type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1;
+    ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1;
   }
 
 
diff --git a/maca_trans_parser/src/mcd.c b/maca_trans_parser/src/mcd.c
deleted file mode 100644
index c701da7a8718f03e2772865cb4faacc0ca26f94d..0000000000000000000000000000000000000000
--- a/maca_trans_parser/src/mcd.c
+++ /dev/null
@@ -1,169 +0,0 @@
-#include<stdio.h>
-#include<stdlib.h>
-#include<string.h>
-
-#include "mcd.h"
-#include "util.h"
-#include "dico.h"
-#include "word_emb.h"
-
-
-mcd *mcd_new(int nb_col)
-{
-  mcd *m = (mcd *)memalloc(sizeof(mcd));
-  int i;
-  m->nb_col = nb_col;
-
-  for(i=0; i < FEAT_TYPE_NB; i++)
-    m->type2col[i] = -1;
-
-  m->representation = (int *)memalloc(nb_col * sizeof(int));
-  m->type =           (int *)memalloc(nb_col * sizeof(int));
-  m->col2type =       (int *)memalloc(nb_col * sizeof(int));
-  m->filename =       (char **)memalloc(nb_col * sizeof(char *));
-  m->dico_array =     (dico **)memalloc(nb_col * sizeof(dico *));
-  m->word_emb_array = (word_emb **)memalloc(nb_col * sizeof(word_emb *));
-  
-  for(i=0; i < nb_col; i++){
-    m->representation[i] = MCD_REPRESENTATION_NULL;
-    m->type[i] = -1;
-    m->col2type[i] = -1;
-    m->filename[i] = NULL;
-    m->dico_array[i] = NULL;
-    m->word_emb_array[i] = NULL;;
-  }
-  return m;
-}
-
-void mcd_free(mcd *m)
-{
-  int i;
-  for(i=0; i < m->nb_col; i++){
-    if(m->dico_array[i]) dico_free(m->dico_array[i]);
-    if(m->word_emb_array[i]) word_emb_free(m->word_emb_array[i]);
-  }
-  free(m->representation);
-  free(m->filename);
-  free(m->dico_array);
-  free(m->word_emb_array);
-
-  free(m);
-}
-
-int mcd_get_code(mcd *m, char *str, int col){
-  if(m->representation[col] == MCD_REPRESENTATION_VOCAB)
-    return dico_string2int(m->dico_array[col], str);
-  if(m->representation[col] == MCD_REPRESENTATION_EMB)
-    return word_emb_get_code(m->word_emb_array[col], str);
-  if(m->representation[col] == MCD_REPRESENTATION_INT)
-    return atoi(str);
-  return MCD_INVALID_VALUE;
-}
-
-int mcd_max_column_index_in_file(char *mcd_filename)
-{
-  int max_col = -1;
-  FILE *f = myfopen(mcd_filename, "r");
-  char buffer[1000]; /* ugly */
-  int column;
-  char type[100];
-  char representation[100];
-  char filename[500]; /* ugly */
-  int fields_number;
-  int line_number = 0;
-
-  while(fgets(buffer, 1000, f)){
-    line_number++;
-    if(feof(f)) break;
-     if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
-    fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename);
-    if(fields_number != 4){
-      fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename);
-      continue;
-    }
-    if(column > max_col) max_col = column;
-  }
-  return max_col;
-}
-
-mcd *mcd_read(char *mcd_filename, char *corpus_filename, dico_vec *vocabs)
-{
-  int column;
-  char type[100];
-  char representation[100];
-  char filename[500]; /* ugly */
-  int fields_number;
-  int line_number = 0;
-  char buffer[1000]; /* ugly */
-  int nb_col = mcd_max_column_index_in_file(mcd_filename);
-  mcd *m = mcd_new(nb_col + 1);
-  FILE *f = myfopen(mcd_filename, "r");
-  /* int first = 1; */
-
-  while(fgets(buffer, 1000, f)){
-    line_number++;
-    if(feof(f)) break;
-     if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
-     fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename);
-     if(fields_number != 4){
-       /* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */
-       continue;
-     }
-     fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename);
-     m->type[column] = feat_type_string2int(type);
-     if(m->type[column] == -1){
-       fprintf(stderr, "in line %d of mcd file %s invalid type, I'm skipping it\n", line_number, mcd_filename);	
-       continue;
-     }
-    m->type2col[m->type[column]] = column;
-    m->col2type[column] = m->type[column];
-    if(!strcmp(representation, "_")) m->representation[column] = MCD_REPRESENTATION_NULL;
-    else if(!strcmp(representation, "EMB")) m->representation[column] = MCD_REPRESENTATION_EMB;
-    else if(!strcmp(representation, "VOCAB")) m->representation[column] = MCD_REPRESENTATION_VOCAB;
-    else if(!strcmp(representation, "INT")) m->representation[column] = MCD_REPRESENTATION_INT;
-    else{ 
-      fprintf(stderr, "in line %d of mcd file %s invalid mode of representation, I'm skipping it\n", line_number, mcd_filename);	  
-      m->representation[column] = MCD_REPRESENTATION_NULL;
-    }
-    if(m->representation[column] != MCD_REPRESENTATION_NULL){
-      m->filename[column] = strdup(filename);
-      if(m->representation[column] == MCD_REPRESENTATION_EMB){
-	fprintf(stderr, "loading word embedding %s\n", m->filename[column]);
-	m->word_emb_array[column] = word_emb_load(m->filename[column]);
-      }
-      else if(m->representation[column] == MCD_REPRESENTATION_VOCAB){
-	if(!strcmp(m->filename[column], "_")){
-	  if(corpus_filename){
-	    fprintf(stderr, "extracting dico %s from corpus\n", type);
-	    m->dico_array[column] = dico_extract_from_corpus(corpus_filename, column, type);
-	  }
-	  else if(vocabs){
-	    fprintf(stderr, "linking to dico %s\n", type);
-	    m->dico_array[column] = dico_vec_get_dico(vocabs, type);
-	  }
-	  if(m->dico_array[column] == NULL)
-	    fprintf(stderr, "cannot find dico %s\n", type);
-	}
-	else{
-	  fprintf(stderr, "loading dico %s\n", m->filename[column]);
-	  m->dico_array[column] = dico_read(m->filename[column], 0.5);
-	}
-      }
-    }
-  }	
-  fclose(f);
-  return m;
-}
-
-
-dico_vec *mcd_build_dico_vec(mcd *mcd_struct)
-{
-  dico_vec *dv = dico_vec_new();
-  int i;
-  for(i=0; i < mcd_struct->nb_col; i++){
-    if(mcd_struct->dico_array[i]){
-      dico_vec_add(dv, mcd_struct->dico_array[i]);
-    }
-  }
-  return dv;
-}
diff --git a/maca_trans_parser/src/train_perceptron.c b/maca_trans_parser/src/train_perceptron.c
index f9c9482dabcbfa09885e9ec8e1162ff80da24b3f..54ff2990ec7318da4a84bc90dc1720b16ce77b16 100644
--- a/maca_trans_parser/src/train_perceptron.c
+++ b/maca_trans_parser/src/train_perceptron.c
@@ -42,7 +42,7 @@ void train_perceptron_check_options(context *ctx)
 {
   if(!ctx->conll_filename
      || ctx->help
-     || !ctx->mcd_filename
+     /* || !ctx->mcd_filename */
      || !ctx->features_model_filename
      || !ctx->perc_model_filename
      || !ctx->vocabs_filename
@@ -60,7 +60,8 @@ int main(int argc, char *argv[])
   ctx = context_read_options(argc, argv);
   train_perceptron_check_options(ctx);
   
-  ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename, NULL);
+  mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
+
   ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
   ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
   
diff --git a/maca_trans_parser/src/transform_treebank.c b/maca_trans_parser/src/transform_treebank.c
index fbda610c9218d08e93298872510f4a229a08a20c..7b46049f116e490dc402e47fd4e62279d439e8f6 100644
--- a/maca_trans_parser/src/transform_treebank.c
+++ b/maca_trans_parser/src/transform_treebank.c
@@ -36,7 +36,7 @@ void transform_treebank_check_options(context *ctx)
 {
   if(!ctx->conll_filename
      || ctx->help
-     || !ctx->mcd_filename
+     /* || !ctx->mcd_filename */
      || !(ctx->cff_filename || ctx->fann_filename)
      ){
     transform_treebank_help_message(ctx);
@@ -280,24 +280,22 @@ int main(int argc, char *argv[])
   transform_treebank_check_options(ctx);
   
   if(ctx->mode == TRAIN_MODE){
-    ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename, NULL);
+    mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
     ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
-    ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
-    /* ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1; */
   }
   else if(ctx->mode == TEST_MODE){
     ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
-    ctx->mcd_struct = mcd_read(ctx->mcd_filename, NULL, ctx->vocabs);
-    ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
+    mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
   }
-
-
+  
+  ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
   if(ctx->dico_labels == NULL){
     fprintf(stderr, "cannot find label names\n");
     return 1;
   }
   ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1;
     
+  feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
 
 
 
diff --git a/maca_trans_parser/src/word.c b/maca_trans_parser/src/word.c
index ed3912374685e646512b2abd75b64fc5f149c664..bdb63930b3fa801c4608b1529ff5ce6607f89151 100644
--- a/maca_trans_parser/src/word.c
+++ b/maca_trans_parser/src/word.c
@@ -48,10 +48,10 @@ word *word_parse_buffer(char *buffer, mcd *mcd_struct)
   w = word_new(buffer);
   token = strtok(buffer, "\t");
   do{
-    if((column_nb < mcd_struct->nb_col) &&  (mcd_struct->col2type[column_nb] != -1)){
-      w->feat_array[mcd_struct->col2type[column_nb]] = mcd_get_code(mcd_struct, token, column_nb);
+    if((column_nb < mcd_struct->nb_col) &&  (mcd_struct->type[column_nb] != -1)){
+      w->feat_array[mcd_struct->type[column_nb]] = mcd_get_code(mcd_struct, token, column_nb);
     }
-    if(mcd_struct->col2type[column_nb] == FEAT_TYPE_FORM){
+    if(mcd_struct->type[column_nb] == FEAT_TYPE_FORM){
       w->U1 = isupper(token[0]) ? 1 : 0;
     }
     column_nb++;