Merge branch 'master' of https://gitlab.lif.univ-mrs.fr/alexis.nasr/macaon2 into johannes

c03efa83 · Johannes Heinecke · 651cd59b · 36ddf619 · c03efa83 · c03efa83
Commit c03efa83 authored Apr 28, 2017 by Johannes Heinecke
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,6 +4,13 @@ project(macaon2)
 find_package(FLEX)

 add_definitions("-Wall" )
+SET(CMAKE_C_COMPILER g++)
+SET(CMAKE_CXX_COMPILER g++)
+
+
+SET( CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Ofast -DUSE_CBLAS")
+SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} -lm -lopenblas" )
+

 if (${CMAKE_C_COMPILER_VERSION} VERSION_LESS 5.3)
 	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11")
@@ -28,11 +35,12 @@ add_subdirectory(maca_common)
 add_subdirectory(maca_tools)
 add_subdirectory(perceptron)
 #add_subdirectory(maca_lemmatizer)
+#add_subdirectory(maca_morpho)
 add_subdirectory(maca_tokenizer)
 add_subdirectory(maca_lexer)
 add_subdirectory(maca_trans_parser)
 add_subdirectory(maca_crf_tagger)
-add_subdirectory(maca_graph_parser)
+#add_subdirectory(maca_graph_parser)

 if(MACA_EXPORT)
  add_subdirectory(maca_export)

--- a/maca_common/CMakeLists.txt
+++ b/maca_common/CMakeLists.txt
@@ -12,6 +12,7 @@ set(SOURCES  src/util.c
  src/feat_desc.c
  src/feat_lib.c
  src/feat_model.c
+  src/char16.c


 )

--- a/maca_common/include/char16.h
+++ b/maca_common/include/char16.h
+#ifndef __CHAR16__
+#define __CHAR16__
+
+typedef short char16;
+
+int     utf8_strlen(char *utf8_string);
+char   *char16toutf8(char16 *char16_string);
+int     char16_strlen(char16 *string);
+char16 *utf8tochar16(char *utf8_string);
+
+#endif
--- a/maca_common/include/feat_model.h
+++ b/maca_common/include/feat_model.h
@@ -24,5 +24,5 @@ feat_desc  *feat_model_add(feat_model *fm, feat_desc *fd);
 feat_model *feat_model_read(char *filename, feat_lib *fl, int verbose);
 void        feat_model_compute_ranges(feat_model *fm, mcd *m, int mvt_nb);
 int         feat_model_get_type_feat_n(feat_model *fm, int n);
-
+void        catenate_int(char *string, int val);
 #endif
--- a/maca_common/include/mcd.h
+++ b/maca_common/include/mcd.h
@@ -8,7 +8,7 @@

 #define MCD_INVALID_VALUE -1

-#define MCD_WF_NB 36
+#define MCD_WF_NB 47

 #define MCD_WF_ID 0
 #define MCD_WF_FORM 1
@@ -47,6 +47,81 @@
 #define MCD_WF_Y 34
 #define MCD_WF_Z 35

+#define MCD_WF_Aspect 36
+#define MCD_WF_Case 37
+#define MCD_WF_Clitic 38
+#define MCD_WF_Definite 39
+#define MCD_WF_Gender 40
+#define MCD_WF_Mood 41
+#define MCD_WF_NameType 42
+#define MCD_WF_NounType 43
+#define MCD_WF_Number 44
+#define MCD_WF_Person 45
+#define MCD_WF_Tense 46
+
+/*Abbr
+AdpType
+AdvType
+Animacy
+Animacy[gram]
+ConjType
+Connegative
+Degree
+Derivation
+Dialect
+Echo
+Evident
+Foreign
+Form
+Gender[dat]
+Gender[erg]
+Gender[psor]
+HebBinyan
+HebExistential
+HebSource
+Hyph
+InfForm
+
+Number[abs]
+Number[dat]
+Number[erg]
+Number[psed]
+Number[psor]
+NumForm
+NumType
+NumValue
+PartForm
+PartType
+Person[abs]
+Person[dat]
+Person[erg]
+Person[psor]
+Polarity
+Polite
+Polite[abs]
+Polite[dat]
+Polite[erg]
+Position
+Poss
+Prefix
+PrepCase
+PrepForm
+PronType
+PunctSide
+PunctType
+Reflex
+Strength
+Style
+Subcat
+Typo
+Variant
+VerbForm
+VerbType
+Voice
+Xtra*/
+
+
+
 #include "dico.h"
 #include "word_emb.h"
 #include "dico_vec.h"
@@ -90,6 +165,9 @@
 #define mcd_get_y_col(m)        (m)->wf2col[MCD_WF_Y]
 #define mcd_get_z_col(m)        (m)->wf2col[MCD_WF_Z]

+
+
+
 #define mcd_set_form_col(m, v) (m)->wf[MCD_WF_FORM] = (v)


@@ -121,6 +199,7 @@ mcd *mcd_build_conll07(void);
 mcd *mcd_build_ifpls(void);
 mcd *mcd_build_wplgf(void);
 mcd *mcd_build_wplgfs(void);
+mcd *mcd_build_wpmlgfs(void);

 mcd      *mcd_read(char *mcd_filename, int verbose);
 void      mcd_link_to_dico(mcd *m, dico_vec *vocabs, int verbose);

--- a/maca_common/include/word.h
+++ b/maca_common/include/word.h
@@ -2,6 +2,7 @@
 #define __WORD__

 #include "mcd.h"
+#include "char16.h"

 #define WORD_INVALID_GOV 10000

@@ -12,10 +13,41 @@ typedef struct _word {
  int signature;                /* pos tags that this form can have (represented as a boolean string) */
  int label;
  char *form;
+  char16 *form_char16;
  int index;
  int is_root;
 } word;

+/*
+#define word_get_s1(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[strlen((w)->form) - 1])
+#define word_get_s2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[strlen((w)->form) - 2])
+#define word_get_s3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[strlen((w)->form) - 3])
+#define word_get_s4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[strlen((w)->form) - 4])
+#define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
+#define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
+*/
+#define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
+#define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
+#define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
+#define word_get_s4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 4])
+#define word_get_s5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 5])
+#define word_get_s6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 6))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 6])
+
+/*#define word_get_p1(w) ((((w) == NULL) || ((w)->form == NULL)                           )? -1 : (w)->form[0])
+#define word_get_p2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[1])
+#define word_get_p3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[2])
+#define word_get_p4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[3])
+#define word_get_p5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[4])
+#define word_get_p6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[5])
+*/
+
+#define word_get_p1(w) ((((w) == NULL) || ((w)->form_char16 == NULL)                           )? -1 : (w)->form_char16[0])
+#define word_get_p2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[1])
+#define word_get_p3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[2])
+#define word_get_p4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[3])
+#define word_get_p5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[4])
+#define word_get_p6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[5])
+
 #define word_get_id(w)             (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID])
 #define word_get_form(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM])
 #define word_get_lemma(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA])

--- a/maca_common/src/char16.c
+++ b/maca_common/src/char16.c
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+typedef short char16;
+
+#define char_bit1(c) ((c) & 1)
+#define char_bit2(c) (((c) & 2) >> 1)
+#define char_bit3(c) (((c) & 4) >> 2)
+#define char_bit4(c) (((c) & 8) >> 3)
+#define char_bit5(c) (((c) & 16) >> 4)
+#define char_bit6(c) (((c) & 32) >> 5)
+#define char_bit7(c) (((c) & 64) >> 6)
+#define char_bit8(c) (((c) & 128) >> 7)
+#define length(c) ((!char_bit8((c)) || (char_bit8(c) && !char_bit7(c)))? 1 : 2)
+/*
+int length(char c)
+{
+  if(!char_bit8(c)) return 1;
+  if(char_bit8(c) && !char_bit7(c)) return 1;
+  if(char_bit7(c)) return 2;
+  if(char_bit6(c)) return 3;
+  if(char_bit5(4)) return 4;
+  
+}
+*/
+int utf8_strlen(char *utf8_string)
+{
+  int l = 0;
+  while(*utf8_string){
+    l += (length(*utf8_string) == 1) ? 1 : 0;
+    utf8_string++;
+  }
+  return l;
+}
+
+char *char16toutf8(char16 *char16_string)
+{
+  return NULL;
+}
+
+
+int char16_strlen(char16 *string)
+{
+  int i=0;
+  while(string[i]) i++;
+  return i;
+}
+
+char16 *utf8tochar16(char *utf8_string)
+{
+  int i,j;
+  int utf8_length = strlen(utf8_string);
+  int char16_length = 0;
+  char16 *char16_string;
+  for(i=0; i < utf8_length; i++)
+    char16_length += length(utf8_string[i]);
+  
+  char16_string = (char16*) malloc((char16_length + 1)* sizeof(char16));
+  for(i=0, j=0; i < utf8_length; i++, j++){
+    if(length(utf8_string[i]) == 1){
+      char16_string[j] = (char16)utf8_string[i];
+    }
+    if(length(utf8_string[i]) == 2){
+      char16_string[j] = utf8_string[i];
+      char16_string[j] = char16_string[j] << 8;
+      char16_string[j] += utf8_string[++i];
+    }
+  }
+  char16_string[j] = 0;
+  return char16_string;
+}
+/*
+int main(void)
+{
+  int i;
+  char string[200];
+  char16 *char16_string;
+  strcpy(string, "élémentaire");
+
+  printf("string = %s\n", string);
+  printf("length = %d\n", (int)strlen(string));
+  printf("utf8 length = %d\n", (int)utf8_strlen(string));
+  for(i=0; i < strlen(string); i++){
+    printf("%d\t%c\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\tl=%d\n", i, string[i], (int)string[i], char_bit1(string[i]), char_bit2(string[i]), char_bit3(string[i]), char_bit4(string[i]), char_bit5(string[i]), char_bit6(string[i]), char_bit7(string[i]), char_bit8(string[i]), length(string[i])); 
+  }
+
+
+  char16_string = utf8tochar16(string);
+  printf("char16_strlen = %d\n", char16_strlen(char16_string));
+  
+}
+*/
--- a/maca_common/src/feat_model.c
+++ b/maca_common/src/feat_model.c
@@ -131,7 +131,6 @@ void catenate_int(char *string, int val)
 }


-
 feat_model *feat_model_new(char *name)
 {
  feat_model *fm = (feat_model *)memalloc(sizeof(feat_model));

--- a/maca_common/src/form2pos.c
+++ b/maca_common/src/form2pos.c
@@ -6,13 +6,13 @@

 form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list)
 {
-  form2pos *f2p = memalloc(sizeof(form2pos));
+  form2pos *f2p = (form2pos *)memalloc(sizeof(form2pos));
  char *token;
  
  f2p->nbelem = nbelem;
  f2p->pos_nb = pos_nb;
-  f2p->d_pos = dico_new("d_pos", pos_nb * 10);
-  f2p->d_signature = dico_new("d_signature", pos_nb * 10);
+  f2p->d_pos = dico_new((char *)"d_pos", pos_nb * 10);
+  f2p->d_signature = dico_new((char *)"d_signature", pos_nb * 10);
  f2p->h_form2signature = hash_new(nbelem * 4);
  token = strtok(pos_list, "\t");
  do{

--- a/maca_common/src/mcd.c
+++ b/maca_common/src/mcd.c
@@ -422,6 +422,63 @@ mcd *mcd_build_wplgfs(void)
  return m;
 }

+mcd *mcd_build_wpmlgfs(void)
+{
+  mcd *m = mcd_new(7);
+  int col;
+
+  col = 0;
+  m->wf[col]=MCD_WF_FORM;
+  m->wf_str[col]=strdup("FORM");
+  m->representation[col]= MCD_REPRESENTATION_VOCAB;
+  m->filename[col] = strdup("_");
+  m->wf2col[MCD_WF_FORM] = col;
+  
+  col = 1;
+  m->wf[col]=MCD_WF_POS;
+  m->wf_str[col]=strdup("POS");
+  m->representation[col]= MCD_REPRESENTATION_VOCAB;
+  m->filename[col] = strdup("_");
+  m->wf2col[MCD_WF_POS] = col;
+
+  col = 2;
+  m->wf[col]=MCD_WF_FEATS;
+  m->wf_str[col]=strdup("FEATS");
+  m->representation[col]= MCD_REPRESENTATION_VOCAB;
+  m->filename[col] = strdup("_");
+  m->wf2col[MCD_WF_FEATS] = col;
+
+  col = 3;
+  m->wf[col]=MCD_WF_LEMMA;
+  m->wf_str[col]=strdup("LEMMA");
+  m->representation[col]= MCD_REPRESENTATION_VOCAB;
+  m->filename[col] = strdup("_");
+  m->wf2col[MCD_WF_LEMMA] = col;
+
+  col = 4;
+  m->wf[col]=MCD_WF_GOV;
+  m->wf_str[col]=strdup("GOV");
+  m->representation[col]= MCD_REPRESENTATION_INT;
+  m->filename[col] = strdup("_");
+  m->wf2col[MCD_WF_GOV] = col;
+
+  col = 5;
+  m->wf[col]=MCD_WF_LABEL;
+  m->wf_str[col]=strdup("LABEL");
+  m->representation[col]= MCD_REPRESENTATION_VOCAB;
+  m->filename[col] = strdup("_");
+  m->wf2col[MCD_WF_LABEL] = col;
+  
+  col = 6;
+  m->wf[col]=MCD_WF_SENT_SEG;
+  m->wf_str[col]=strdup("SENT_SEG");
+  m->representation[col]= MCD_REPRESENTATION_INT;
+  m->filename[col] = strdup("_");
+  m->wf2col[MCD_WF_SENT_SEG] = col;
+  
+  return m;
+}
+


 /* returns a dico_vec containing the different dictionnaries found in an mcd structure */

--- a/maca_common/src/trie.c
+++ b/maca_common/src/trie.c
@@ -7,7 +7,7 @@

 trie_state *trie_state_new(trie_trans *transitions, int is_accept)
 {
-  trie_state *state = memalloc(sizeof(trie_state));
+  trie_state *state = (trie_state *) memalloc(sizeof(trie_state));
  state->transitions = transitions;
  state->is_accept = is_accept;
  state->fail = 0;
@@ -24,7 +24,7 @@ void trie_state_free(trie_state *state)

 trie *trie_new(void)
 {
-  trie *t = memalloc(sizeof(trie));
+  trie *t = (trie *) memalloc(sizeof(trie));
  t->states = NULL;
  t->size = 0;
  t->states_nb = 0;
@@ -45,7 +45,7 @@ void trie_free(trie *t)

 trie_trans *trie_trans_new(int destination, int symbol, trie_trans *next)
 {
-  trie_trans *trans = memalloc(sizeof(trie_trans));
+  trie_trans *trans = (trie_trans *)memalloc(sizeof(trie_trans));
  trans->destination = destination;
  trans->symbol = symbol;
  trans->next = next;

--- a/maca_common/src/word.c
+++ b/maca_common/src/word.c
@@ -19,6 +19,7 @@ word *word_new(char *input)

  w->wf_array[MCD_WF_GOV] = WORD_INVALID_GOV;
  w->form = NULL;
+  w->form_char16 = NULL;

  w->index = -1;
  w->signature = -1;
@@ -59,11 +60,13 @@ word *word_parse_buffer(char *buffer, mcd *mcd_struct)
  w = word_new(buffer);
  token = strtok(buffer, "\t");
  do{
-    if((col < mcd_struct->nb_col) &&  (mcd_struct->wf[col] != -1) && (strcmp(token, "_"))){
+    /* if((col < mcd_struct->nb_col) &&  (mcd_struct->wf[col] != -1) && (strcmp(token, "_"))){ */
+    if((col < mcd_struct->nb_col) &&  (mcd_struct->wf[col] != -1)){
      w->wf_array[mcd_struct->wf[col]] = mcd_get_code(mcd_struct, token, col);
    }
    if(mcd_struct->wf[col] == MCD_WF_FORM){
      w->form = strdup(token);
+      w->form_char16 = utf8tochar16(w->form);
      w->U1 = isupper(token[0]) ? 1 : 0;
    }
    col++;
@@ -95,6 +98,7 @@ void word_free(word *w)
  if(w == NULL) return;
  if(w->input) free(w->input);
  if(w->form) free(w->form);
+  if(w->form_char16) free(w->form_char16);
  free(w);
 }


--- a/maca_graph_parser/array.c
+++ b/maca_graph_parser/array.c
@@ -2,7 +2,7 @@
 #include "array.h"

 array_t* array_new() {
-    array_t* array = malloc(sizeof(array_t));
+  array_t* array = (array_t *)malloc(sizeof(array_t));
    array->num_elements = 0;
    array->data = NULL;
    return array;
@@ -18,7 +18,7 @@ ARRAY_TYPE array_get(array_t* array, int element) {
 }

 void array_push(array_t* array, ARRAY_TYPE value) {
-    array->data = realloc(array->data, sizeof(ARRAY_TYPE) * (array->num_elements + 1));
+  array->data = (ARRAY_TYPE *)realloc(array->data, sizeof(ARRAY_TYPE) * (array->num_elements + 1));
    array->data[array->num_elements] = value;
    array->num_elements++;
 }

--- a/maca_graph_parser/maca_graph_parser.c
+++ b/maca_graph_parser/maca_graph_parser.c
@@ -75,7 +75,7 @@ void maca_graph_parser_print_ctx(maca_graph_parser_ctx *ctx)

 maca_graph_parser_ctx * maca_graph_parser_InitCTX()
 {
-    maca_graph_parser_ctx * ctx = calloc(sizeof(maca_graph_parser_ctx), 1);
+  maca_graph_parser_ctx * ctx = (maca_graph_parser_ctx *)calloc(sizeof(maca_graph_parser_ctx), 1);

    ctx->cfg=MACA_DEFAULT_CFG;
    ctx->verbose_flag = maca_verbose;

--- a/maca_graph_parser/maca_graph_parser_alphabet.c
+++ b/maca_graph_parser/maca_graph_parser_alphabet.c
@@ -36,7 +36,7 @@ void maca_graph_parser_alphabet_free(maca_graph_parser_alphabet *a)

 maca_graph_parser_alphabet *maca_graph_parser_alphabet_new(char *name)
 {
-  maca_graph_parser_alphabet *a = malloc(sizeof(maca_graph_parser_alphabet));
+  maca_graph_parser_alphabet *a = (maca_graph_parser_alphabet *)malloc(sizeof(maca_graph_parser_alphabet));
  if(a == NULL){
    fprintf(stderr, "memory allocation error\n");
    exit(1);
@@ -153,7 +153,7 @@ maca_graph_parser_alphabet **maca_graph_parser_alphabet_load4(char *filename)
  int i = 0;
  char symbol[1000];
  maca_graph_parser_alphabet *a = NULL;
-  maca_graph_parser_alphabet **alpha_array = malloc(4 * sizeof(maca_graph_parser_alphabet*));
+  maca_graph_parser_alphabet **alpha_array = (maca_graph_parser_alphabet **)malloc(4 * sizeof(maca_graph_parser_alphabet*));

  for(i=0; i < 4; i++)
    alpha_array[i] = NULL;
@@ -182,7 +182,7 @@ maca_graph_parser_alphabet **maca_graph_parser_alphabet_load5(char *filename)
  int i = 0;
  char symbol[1000];
  maca_graph_parser_alphabet *a = NULL;
-  maca_graph_parser_alphabet **alpha_array = malloc(5 * sizeof(maca_graph_parser_alphabet*));
+  maca_graph_parser_alphabet **alpha_array = (maca_graph_parser_alphabet **)malloc(5 * sizeof(maca_graph_parser_alphabet*));

  for(i=0; i < 5; i++)
    alpha_array[i] = NULL;

--- a/maca_lemmatizer/src/maca_lemmatizer.c
+++ b/maca_lemmatizer/src/maca_lemmatizer.c
@@ -125,6 +125,7 @@ int main(int argc, char *argv[])
  char *buffer_copy;
  char *form;
  char *pos;
+  char *feats;

  char *token;
  int column_nb;
@@ -136,11 +137,16 @@ int main(int argc, char *argv[])
  int form_column;
  int pos_column;
  int lemma_column;
+  int feats_column;
  FILE *f = NULL;

  ctx = context_read_options(argc, argv);
  maca_lemmatizer_check_options(ctx);

+
+  feats_column = ctx->mcd_struct->wf2col[MCD_WF_FEATS];
+
+  
  if(ctx->pos_column != -1)
    pos_column = ctx->pos_column;
  else
@@ -177,6 +183,7 @@ int main(int argc, char *argv[])
    form = NULL;
    pos = NULL;
    lemma = NULL;
+    feats = NULL;
    do{
      if(column_nb == lemma_column) /* lemma is present in the input file */
 	if(strcmp(token, "_")) /* and it is not an underscore */
@@ -188,6 +195,9 @@ int main(int argc, char *argv[])
      if(column_nb == pos_column){
 	pos = strdup(token);
      }
+      if(column_nb == feats_column){
+	feats = strdup(token);
+      }
      column_nb++;
    } while((token = strtok(NULL , "\t")));
    
@@ -215,11 +225,13 @@ int main(int argc, char *argv[])
    
    /* print_word(buffer, ctx->mcd_struct, lemma); */

-  /* printf("form = %s pos = %s (%s) lemma = %s\n", form, pos, form_pos, lemma);  */
+    printf("form = %s pos = %s (%s) feats = %s lemma = %s\n", form, pos, form_pos, feats, lemma); 
+    printf("form = %s pos = %s (%s) feats = %s lemma = %s\n", form, pos, form_pos, feats, lemma); 
    printf("\t%s\n", lemma);
    
    if(pos)free(pos);
    if(form)free(form);
+    if(feats)free(feats);
  }
  free(buffer_copy);
  free(lemma_array);

--- a/maca_lexer/src/extract_mwe_from_fplm.c
+++ b/maca_lexer/src/extract_mwe_from_fplm.c
@@ -29,7 +29,7 @@ dico *decompose_mwe_in_fplm_file(char *fplm_filename, FILE *output_file, int deb
  char token[1000];
  int l;
  int i, j;
-  dico *d_tokens = dico_new("TOKENS", 100000);
+  dico *d_tokens = dico_new((char *)"TOKENS", 100000);
  int token_code;  
  while(fgets(buffer, 10000, f)){
    fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
@@ -71,6 +71,6 @@ int main(int argc, char *argv[])
  dico *d_tokens;

  d_tokens = decompose_mwe_in_fplm_file(argv[1], stdout, 1);
-  dico_print("d_tokens.dico", d_tokens);
+  dico_print((char *)"d_tokens.dico", d_tokens);
  dico_free(d_tokens);
 }
--- a/maca_morpho/CMakeLists.txt
+++ b/maca_morpho/CMakeLists.txt
+set(SOURCES
+  src/maca_morpho_feat_fct.c
+  src/maca_morpho_context.c
+  src/vectorize.c
+)
+
+
+
+#compiling library
+include_directories(src)
+add_library(maca_morpho STATIC ${SOURCES})
+target_link_libraries(maca_morpho perceptron)
+target_link_libraries(maca_morpho maca_common)
+
+
+  
+#compiling, linking and installing executables
+
+add_executable(fplm2cff ./src/fplm2cff.c)
+target_link_libraries(fplm2cff perceptron)
+target_link_libraries(fplm2cff maca_common)
+target_link_libraries(fplm2cff maca_morpho)
+install (TARGETS fplm2cff DESTINATION bin)
+
+add_executable(predict ./src/predict.c)
+target_link_libraries(predict perceptron)
+target_link_libraries(predict maca_common)
+target_link_libraries(predict maca_morpho)
+install (TARGETS predict DESTINATION bin)
+
--- a/maca_morpho/src/fplm2cff.c
+++ b/maca_morpho/src/fplm2cff.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "maca_morpho_context.h"
+#include "feat_model.h"
+#include "feat_vec.h"
+#include "dico.h"
+#include "util.h"
+#include "vectorize.h"
+
+void decompose_feature_value(char *feature_value, char *feature, char *value)
+{
+  int i,j;
+  int l = strlen(feature_value);
+  int before = 1;
+  for(i=0; (i < l) && (feature_value[i] != '='); i++){
+    feature[i] = feature_value[i];
+  }
+  feature[i] = '\0';
+  i++;
+  for(j=0; i<l; i++, j++){
+    value[j] = feature_value[i];
+  }
+  value[j] = '\0';
+}
+
+
+int main(int argc, char *argv[])
+{
+  context *ctx = context_read_options(argc, argv);
+  if(ctx->help){
+    context_general_help_message(ctx);
+    context_language_help_message(ctx);
+    context_fplm_help_message(ctx);
+    context_maca_data_path_help_message(ctx);
+    context_features_filename_help_message(ctx);
+    context_features_model_help_message(ctx);
+    exit(1);
+  }
+  feat_vec *fv = feat_vec_new(10);
+  dico *dico_features = dico_new("dico_features", 1000);
+  /*  feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); */
+  char form[100];
+  char pos[100];
+  char lemma[100];
+  char morpho[100];
+  FILE *F_fplm = NULL;
+  char buffer[1000];
+  char feature_value[100];
+  char feature[100];
+  char value[100];
+  char *token;
+
+  
+  F_fplm = myfopen(ctx->fplm_filename, "r");
+
+  
+  while(fgets(buffer, 1000, F_fplm)){
+    if(feof(F_fplm)) 
+      break;
+    //    printf("%s", buffer);
+    buffer[strlen(buffer) - 1] = '\0';
+    sscanf(buffer, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", form, pos, lemma, morpho);
+    //printf("form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho);
+    token = strtok(morpho, "|");
+    do{
+      //printf("token = %s\n", token);
+      decompose_feature_value(token, feature, value);
+      //printf("feature = %s value = %s\n", feature, value);
+    }while((token = strtok(NULL, "|")));
+    
+    
+  }
+  fclose(F_fplm);
+}
+    
+    /*
+  while(strcmp(form, "end")){
+    fscanf(stdin, "%s", form);
+    printf("form = %s\n", form);
+    form2fv(form, fv, fm, dico_features, ADD_MODE);
+    //void      feat_vec_print_string(feat_vec *fv, dico *dico_features);
+    feat_vec_print(stdout, fv);
+  }
+  //dico_print_fh(stdout, dico_features);
+  if(ctx->features_filename)
+    dico_print(ctx->features_filename, dico_features);
+    */
+
+  
+  
--- a/maca_morpho/src/maca_morpho_context.c
+++ b/maca_morpho/src/maca_morpho_context.c
+#include<stdlib.h>
+#include<stdio.h>
+#include<string.h>
+#include<unistd.h>
+#include<getopt.h>
+#include "maca_morpho_context.h"
+#include "util.h"
+
+
+void context_set_linguistic_resources_filenames(context *ctx);
+
+void context_free(context *ctx)
+{
+  if(ctx->program_name) free(ctx->program_name);
+  if(ctx->fplm_filename) free(ctx->fplm_filename);
+  if(ctx->cfw_filename) free(ctx->cfw_filename);
+  if(ctx->language) free(ctx->language);
+  if(ctx->maca_data_path) free(ctx->maca_data_path);
+  free(ctx);
+}
+
+context *context_new(void)
+{
+  context *ctx = (context *)memalloc(sizeof(context));
+
+  ctx->help = 0;
+  ctx->verbose = 0;
+  ctx->debug_mode = 0;
+  ctx->program_name = NULL;
+  ctx->fplm_filename = NULL;
+  ctx->language = strdup("fr");
+  ctx->maca_data_path = NULL;
+  ctx->features_filename = NULL;
+  ctx->cfw_filename = NULL;
+  return ctx;
+}
+
+void context_general_help_message(context *ctx)
+{
+    fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
+    fprintf(stderr, "Options:\n");
+    fprintf(stderr, "\t-h --help             : print this message\n");
+    fprintf(stderr, "\t-v --verbose          : activate verbose mode\n");
+    fprintf(stderr, "\t-r --hratio   <float> : set the occupation ratio of hash tables (default is 0.5)\n");
+}
+
+void context_fplm_help_message(context *ctx){
+  fprintf(stderr, "\t-f --fplm   <file>  : fplm (form pos lemma morpho) file\n");
+}
+
+void context_language_help_message(context *ctx){
+  fprintf(stderr, "\t-L --language  : identifier of the language to use\n");
+}
+
+void context_maca_data_path_help_message(context *ctx){
+  fprintf(stderr, "\t-M --maca_data_path  : path to maca_data directory\n");
+}
+
+void context_fm_help_message(context *ctx){
+  fprintf(stderr, "\t-F --fm <file> : feature model file name\n");
+}
+
+void context_features_filename_help_message(context *ctx){
+  fprintf(stderr, "\t-x --feat <file> : features dictionary file name\n");
+}
+
+void context_weights_matrix_filename_help_message(context *ctx){
+  fprintf(stderr, "\t-w --weights <file> : weight matrix (cfw) filename\n");
+}
+
+void context_features_model_help_message(context *ctx){
+  fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n");
+}
+
+context *context_read_options(int argc, char *argv[])
+{
+  int c;
+  int option_index = 0;
+  context *ctx = context_new();
+
+  ctx->program_name = strdup(argv[0]);
+
+  static struct option long_options[10] =
+    {
+      {"help",                no_argument,       0, 'h'},
+      {"verbose",             no_argument,       0, 'v'},
+      {"debug",               no_argument,       0, 'd'},
+      {"mcd",                 required_argument, 0, 'C'}, 
+      {"language",            required_argument, 0, 'L'},
+      {"fplm",                required_argument, 0, 'f'},
+      {"maca_data_path",      required_argument, 0, 'D'},
+      {"fm",                  required_argument, 0, 'F'},
+      {"feat",                required_argument, 0, 'x'},
+      {"weights",             required_argument, 0, 'w'}
+    };
+  optind = 0;
+  opterr = 0;
+  
+  while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:", long_options, &option_index)) != -1){ 
+    switch (c)
+      {
+      case 'd':
+	ctx->debug_mode = 1;
+	break;
+      case 'h':
+	ctx->help = 1;
+	break;
+      case 'v':
+	ctx->verbose = 1;
+	break;
+      case 'f':
+	ctx->fplm_filename = strdup(optarg);
+	break;
+      case 'L':
+	ctx->language = strdup(optarg);
+	break;
+      case 'D':
+	ctx->maca_data_path = strdup(optarg);
+	break;
+      case 'F':
+	ctx->fm_filename = strdup(optarg);
+	break;
+      case 'x':
+	ctx->features_filename = strdup(optarg);
+	break;
+      case 'w':
+	ctx->cfw_filename = strdup(optarg);
+	break;
+      }
+  }
+
+  context_set_linguistic_resources_filenames(ctx);
+
+  return ctx;
+}
+
+void context_set_linguistic_resources_filenames(context *ctx)
+{
+  char absolute_path[500];
+  char absolute_filename[500];
+
+  absolute_path[0] = '\0';
+
+  if(ctx->maca_data_path)
+    strcat(absolute_path, ctx->maca_data_path);
+  else {
+      char *e = getenv("MACAON_DIR");
+      if (e != NULL) {
+	  strcat(absolute_path, e);	  
+      } else {
+	  fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
+      }
+  }
+
+	   
+  strcat(absolute_path, "/");
+  strcat(absolute_path, ctx->language);
+  strcat(absolute_path, "/bin/");
+
+  if(!ctx->fplm_filename){
+    strcpy(absolute_filename, absolute_path);
+    strcat(absolute_filename, DEFAULT_FPLM_FILENAME);
+    ctx->fplm_filename = strdup(absolute_filename);
+  }
+  
+}