From 1c19c11d9f61beb73fbff12aadde05b1b85d552a Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Thu, 12 Apr 2018 12:22:59 +0200
Subject: [PATCH] added several tools to manipulate corpora in maca_corpora as
 well as conll2mcf

---
 CMakeLists.txt                                |   2 +
 maca_common/CMakeLists.txt                    |   5 +-
 maca_common/include/conll_lib.h               | 102 ++++
 maca_common/include/hash_str.h                |  32 +
 maca_common/src/conll_lib.c                   | 561 ++++++++++++++++++
 maca_common/src/hash_str.c                    | 118 ++++
 maca_corpora/CMakeLists.txt                   |   2 +
 maca_corpora/exec/CMakeLists.txt              |  12 +
 maca_corpora/exec/ftb2fr.c                    | 237 ++++++++
 maca_corpora/exec/ptb2en.c                    | 275 +++++++++
 maca_corpora/lib/CMakeLists.txt               |  11 +
 maca_corpora/lib/include/ftb_lib.h            |  38 ++
 maca_corpora/lib/include/orfeo_lib.h          |  17 +
 maca_corpora/lib/src/ftb_lib.c                | 170 ++++++
 maca_corpora/lib/src/orfeo_lib.c              | 428 +++++++++++++
 maca_tools/CMakeLists.txt                     |  12 +-
 maca_tools/src/conll2mcf.c                    | 158 +++++
 maca_tools/src/conllu2mcf.c                   | 159 +++++
 .../src/simple_decoder_parser_arc_eager.c     |   3 +
 19 files changed, 2337 insertions(+), 5 deletions(-)
 create mode 100644 maca_common/include/conll_lib.h
 create mode 100644 maca_common/include/hash_str.h
 create mode 100644 maca_common/src/conll_lib.c
 create mode 100644 maca_common/src/hash_str.c
 create mode 100644 maca_corpora/CMakeLists.txt
 create mode 100644 maca_corpora/exec/CMakeLists.txt
 create mode 100644 maca_corpora/exec/ftb2fr.c
 create mode 100644 maca_corpora/exec/ptb2en.c
 create mode 100644 maca_corpora/lib/CMakeLists.txt
 create mode 100644 maca_corpora/lib/include/ftb_lib.h
 create mode 100644 maca_corpora/lib/include/orfeo_lib.h
 create mode 100644 maca_corpora/lib/src/ftb_lib.c
 create mode 100644 maca_corpora/lib/src/orfeo_lib.c
 create mode 100644 maca_tools/src/conll2mcf.c
 create mode 100644 maca_tools/src/conllu2mcf.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 80f3d0f..bbac66d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,6 +32,7 @@ endif()
 
 include_directories(maca_common/include)
 include_directories(perceptron/lib/include)
+include_directories(maca_corpora/lib/include)
 
 add_subdirectory(maca_common)
 add_subdirectory(maca_tools)
@@ -42,6 +43,7 @@ add_subdirectory(maca_tokenizer)
 add_subdirectory(maca_lexer)
 add_subdirectory(maca_trans_parser)
 add_subdirectory(maca_crf_tagger)
+add_subdirectory(maca_corpora)
 #add_subdirectory(maca_graph_parser)
 
 if(MACA_EXPORT)
diff --git a/maca_common/CMakeLists.txt b/maca_common/CMakeLists.txt
index e389629..de1eb4c 100644
--- a/maca_common/CMakeLists.txt
+++ b/maca_common/CMakeLists.txt
@@ -1,5 +1,7 @@
-set(SOURCES  src/util.c
+set(SOURCES
+ src/util.c
  src/hash.c
+ src/hash_str.c
  src/dico.c
  src/word_emb.c
  src/mcd.c
@@ -17,6 +19,7 @@ set(SOURCES  src/util.c
  src/fplm.c
  src/json_parser.c
  src/json_tree.c
+ src/conll_lib.c
 )
 
 #compiling library
diff --git a/maca_common/include/conll_lib.h b/maca_common/include/conll_lib.h
new file mode 100644
index 0000000..077ca95
--- /dev/null
+++ b/maca_common/include/conll_lib.h
@@ -0,0 +1,102 @@
+/*******************************************************************************
+    Copyright (C) 2010 by Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
+                      and Joseph Le Roux <joseph.le.roux@gmail.com>
+    conll_lib is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    conll_lib is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with conll_lib. If not, see <http://www.gnu.org/licenses/>.
+*******************************************************************************/
+
+#ifndef __CONLL_LIB__
+#define __CONLL_LIB__
+
+#include <stdio.h>
+#include "hash_str.h"
+
+#define MAX_WORDS_IN_SENTENCE 1000
+#define MAX_STR 10000
+#define MAX_LINE_LENGTH 50000
+
+#define INCORRECT_SENTENCE_NUM_VALUE -1
+#define INCORRECT_PARSE_NUM_VALUE -1
+#define INCORRECT_LOGPROB_VALUE 10
+#define INCORRECT_ORACLE_VALUE -1
+#define INCORRECT_CONF_MEAS -1
+#define INCORRECT_LEX_AFF -1
+
+typedef struct w
+{
+  unsigned id;   /* Token counter, starting at 1 for each new sentence.*/
+  char form[MAX_STR];   /* Word form or punctuation symbol.*/
+  char lemma[MAX_STR];  /* Lemma or stem (depending on particular data set) of word form,*/
+                 /* or an underscore if not available.*/
+  char cpostag[MAX_STR];/* Coarse-grained part-of-speech tag, where tagset depends on the language.*/
+  char postag[MAX_STR]; /* Fine-grained part-of-speech tag, where the tagset depends on the language,*/
+                 /* or identical to the coarse-grained part-of-speech tag if not available.*/
+  char feats[MAX_STR];  /* Unordered set of syntactic and/or morphological features (depending on the particular language)*/
+                 /*, separated by a vertical bar (|), or an underscore if not available.*/
+  int head; /* Head of the current token, which is either a value of ID or zero ('0').*/
+  char deprel[MAX_STR];  /* Dependency relation to the HEAD. The set of dependency relations depends on the particular language.*/
+                 /* Note that depending on the original treebank annotation, the dependency relation may be meaningful or simply 'ROOT'.*/
+  unsigned phead;/* Projective head of current token, which is either a value of ID or zero ('0'), or an underscore if not available. */
+                 /* The dependency structure resulting from the PHEAD column is guaranteed to be projective */
+                 /* whereas the structures resulting from the HEAD column will be non-projective for some sentences */
+  char pdeprel[MAX_STR]; /* Dependency relation to the PHEAD, or an underscore if not available. */
+                 /* Note that depending on the original treebank annotation, the dependency relation may be meaningful or simply 'ROOT'.*/
+  char language[MAX_STR]; /* Language identifier */
+
+  double score; /* score of the dependency, not in the 2007 conll format */
+  double lex_aff; /*  lexical affinity of the dependent and the governor, not in the 2007 conll format */
+
+  struct w * mother;
+  struct w * daughters[MAX_WORDS_IN_SENTENCE];
+  unsigned daughters_nb;
+  double conf_meas;
+} conll_word;
+
+
+typedef struct
+{
+  conll_word * root;
+  conll_word * words[MAX_WORDS_IN_SENTENCE];
+  unsigned l; /* sentence length */
+  unsigned num; /* sentence number */
+} conll_sentence;
+
+
+conll_sentence *conll_allocate_sentence(void);
+void            conll_renumber_sentence(conll_sentence *s);
+void            conll_reset_sentence(conll_sentence *s);
+void            conll_free_sentence(conll_sentence *s);
+int             conll_load_sentence(FILE *f, conll_sentence *s);
+void            conll_print_sentence(conll_sentence *s);
+void            conll_print_sentence_mcf(conll_sentence *s, int coarse_pos);
+void            conll_print_sentence_mcf2(conll_sentence *s, int print_id, int print_form, int print_lemma, int print_cpostag, int print_postag, int print_feats, int print_head, int print_deprel);
+void            conll_print_sentence_mcf3(conll_sentence *s, char *columns, int nb_col);
+void            conll_compact_sentence(conll_sentence *s);
+conll_word     *conll_allocate_word(unsigned id, char *form, char *lemma, char *cpostag, char *postag, char *feats, unsigned head, char *deprel);
+conll_word     *conll_copy_word(conll_word *w);
+void            conll_add_daughter(conll_word *daughter, conll_word *mother);
+void            conll_remove_daughter(conll_sentence *s, int i);
+void            conll_remove_word_rec(conll_sentence *s, int i);
+void            conll_remove_subtree(conll_sentence *s, int root);
+void            conll_add_word(conll_sentence *s, conll_word *w, int pos, conll_word *gov);
+void            conll_split_node_in_two(conll_sentence *s, int pos, conll_word *gov, conll_word *dep, int pos_gov, int pos_dep);
+void            conll_change_pos(conll_sentence *s, hash_str *h_pos);
+void            conll_change_cpos(conll_sentence *s, hash_str *h_cpos);
+void            conll_change_fct(conll_sentence *s, hash_str *h_fct);
+int             conll_is_num(char *s);
+void            conll_renumber_sentence_offset(conll_sentence *s, int offset);
+void            conll_compute_relative_index_of_heads(conll_sentence *s);
+
+
+
+#endif
diff --git a/maca_common/include/hash_str.h b/maca_common/include/hash_str.h
new file mode 100644
index 0000000..1429a11
--- /dev/null
+++ b/maca_common/include/hash_str.h
@@ -0,0 +1,32 @@
+#ifndef __HASH_STR__
+#define __HASH_STR__
+
+#define HASH_STR_INVALID_VAL NULL
+
+typedef struct _hash_str_cell
+{
+  char *key;
+  char *val;
+  struct _hash_str_cell *next; 
+} hash_str_cell;
+
+typedef struct
+{
+  int size;
+  int nbelem;
+  hash_str_cell **array;
+} hash_str;
+
+
+hash_str_cell *hash_str_cell_new(char *key, char *val, hash_str_cell *next);
+void hash_str_cell_free(hash_str_cell *c);
+
+hash_str *hash_str_new(int size);
+void hash_str_free(hash_str *h);
+hash_str_cell *hash_str_lookup(hash_str *h, char *key);
+char *hash_str_get_val(hash_str *h, char *key);
+void hash_str_add(hash_str *h, char *key, char *val);
+void hash_str_stats(hash_str *h);
+
+
+#endif
diff --git a/maca_common/src/conll_lib.c b/maca_common/src/conll_lib.c
new file mode 100644
index 0000000..f5e2722
--- /dev/null
+++ b/maca_common/src/conll_lib.c
@@ -0,0 +1,561 @@
+/*******************************************************************************
+    Copyright (C) 2010 by Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
+                      and Joseph Le Roux <joseph.le.roux@gmail.com>
+    conll_lib is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    conll_lib is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with conll_lib. If not, see <http://www.gnu.org/licenses/>.
+*******************************************************************************/
+
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include"conll_lib.h"
+
+int conll_parse_line(FILE *f, conll_sentence *s);
+
+void conll_compute_relative_index_of_heads(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+  
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    /* printf("i = %d head = %d\n", w->head); */
+    /* roots keep 0 as index of head */
+     if(w->head != 0) 
+      w->head = w->head - i; 
+  }
+}
+
+
+void conll_renumber_sentence(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+  for(i=0 ; i < s->l; i++){
+    s->words[i]->id = i;
+  }
+  for(i=0 ; i < s->l; i++){
+    w = s->words[i];
+    if(w->mother)
+      w->head = w->mother->id;
+    else
+      w->head = 0;
+  }
+}
+
+
+void conll_reset_sentence(conll_sentence *s)
+{
+  int i;
+  for(i=0 ; i < s->l; i++){
+    if(s->words[i]){
+      free(s->words[i]);
+      s->words[i] = NULL;
+    }
+  }
+  s->words[0] = conll_allocate_word(0, "ROOT", "ROOT", "ROOT", "ROOT", "ROOT", -1, "ROOT");
+  s->l = 1;
+
+}
+
+void conll_free_sentence(conll_sentence *s)
+{
+  int i;
+  for(i=0 ; i < s->l; i++){
+    if(s->words[i]){
+      /* free(s->words[i]); */
+    }
+  }
+  free(s);
+}
+
+conll_word *conll_copy_word(conll_word *w){
+  return conll_allocate_word(w->id, w->form, w->lemma, w->cpostag, w->postag, w->feats, w->head, w->deprel);
+}
+
+conll_word *conll_allocate_word(unsigned id, char *form, char *lemma, char *cpostag, char *postag, char *feats, unsigned head, char *deprel)
+
+{
+  conll_word *w = (conll_word *)malloc(sizeof(conll_word));
+
+  w->id = id;
+  strcpy(w->form, form);
+  strcpy(w->lemma, lemma);
+  strcpy(w->cpostag, cpostag);
+  strcpy(w->postag, postag);
+  strcpy(w->feats, feats);
+  w->head = head;
+  strcpy(w->deprel, deprel);
+  w->mother = NULL;
+  w->daughters_nb = 0;
+  return w;
+}
+
+conll_sentence *conll_allocate_sentence(void)
+{
+  conll_sentence *s;
+  int i;
+
+  s = (conll_sentence *)malloc(sizeof(conll_sentence));
+  if(s == NULL){
+    fprintf(stderr, "cannot allocate sentence\n");
+    exit(1);
+  }
+
+  s->num = INCORRECT_SENTENCE_NUM_VALUE;
+  s->l = 0;
+  for(i=0; i < MAX_WORDS_IN_SENTENCE; i++){
+    s->words[i] = NULL;
+  }
+  return s;
+}
+
+int conll_load_sentence(FILE *f, conll_sentence *s)
+{
+  int res;
+  int i;
+  if(feof(f)) return 0;
+
+  conll_reset_sentence(s);
+
+  for(res = conll_parse_line(f, s); res; res = conll_parse_line(f, s));
+  
+  /* read an 'empty' parse (two succeding cr) */
+  if(s->l == 0) return 0;
+  
+  /* build the tree structure */
+  s->words[0]->mother = NULL;
+  for(i=1; i < s->l; ++i){
+    if((s->words[i]->head >= 0) && (s->words[i]->head <= s->l)){ /* check that head attribute is not out of range */
+      conll_add_daughter(s->words[i], s->words[s->words[i]->head]);
+    }
+  }
+  
+  return 1;
+}
+
+
+
+/*----------------------------------------------------------------------------*/
+int conll_parse_line(FILE *f, conll_sentence *s)
+{
+  char buff[MAX_LINE_LENGTH];
+  conll_word *w;
+  char head_str[100];
+  char C9[100];
+  char C10[100];
+
+  if(feof(f)) return 0;
+
+  if (fgets(buff, MAX_LINE_LENGTH, f) == NULL) {
+    //    fprintf(stderr, "cannot read file: empty ?");
+    return 0;
+  }
+
+  /* ignore empty line */
+  if(buff[0] == '\n'){
+    /* printf("\n"); */
+    return 0;
+  }
+
+  /* specific to conll_u */
+  
+  /* ignore comments  */
+  if(buff[0] == '#'){
+    return 0;
+  }
+  
+  {
+  /* ignore amalgams  */
+    int i;
+    for(i=0; (buff[i] != '\t') && (i < MAX_LINE_LENGTH); i++)
+      if(buff[i] == '-') return 1;
+
+  }
+
+  {
+  /* ignore ellipsis */
+    int i;
+    for(i=0; (buff[i] != '\t') && (i < MAX_LINE_LENGTH); i++)
+      if(buff[i] == '.') return 1;
+
+  }
+
+  
+
+  /* end of specific to conll_u */
+
+  
+  s->words[s->l] = w = (conll_word *)malloc(sizeof(conll_word));
+  w->daughters_nb = 0;
+  s->l++;
+  
+  if(s->l < MAX_WORDS_IN_SENTENCE){
+    /* read a dependency description */
+    
+    
+    /* 1	A	a	_	DT	_	3	det	_	_ */
+    /* 2	severe	severe	_	JJ	_	3	amod	_	_ */
+    /* 3	storm	storm	_	NN	_	4	nsubj	_	_ */
+    /* 4	swept	sweep	_	VBD	_	26	ccomp	_	_ */
+    /* 5	through	through	_	IN	_	4	prep	_	_ */
+     
+    /* sscanf(buff, "%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel); */
+    sscanf(buff, "%d\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%[^\t]\t%s",&(w->id), w->form, w->lemma, w->cpostag, w->postag, w->feats, head_str, w->deprel, C9, C10, w->language);
+    /*    printf("buff = %s\n", buff);
+    printf("form = %s\n", w->form);
+    printf("lemma = %s\n", w->lemma);
+    printf("cpostag = %s\n", w->cpostag);
+    printf("postag = %s\n", w->postag);
+    printf("feats = %s\n", w->feats);
+    printf("head_str = %s\n", head_str);
+    printf("language = %s\n", w->language);
+    printf("C10 = %s\n", C9);
+    printf("C9 = %s\n", C10);*/
+    
+    /* w->cpostag[0] = w->postag[0]; */
+    if(strcmp(head_str, "_")){
+      w->head = atoi(head_str);
+      if(w->head == 0) s->root = w;
+    }
+    
+  }    
+  
+  return 1;
+}
+
+void conll_print_sentence_mcf2(conll_sentence *s, int print_id, int print_form, int print_lemma, int print_cpostag, int print_postag, int print_feats, int print_head, int print_deprel)
+{
+  int i;
+  conll_word *w;
+
+  if((s->l == 1) || (s->l == 0)) return;
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(print_id)
+      printf("%d\t", w->id);
+    if(print_form)
+      printf("%s\t", w->form);
+    if(print_lemma)
+      printf("%s\t", w->lemma);
+    if(print_cpostag)
+      printf("%s\t", w->cpostag);
+    if(print_postag)
+      printf("%s\t", w->postag);
+    if(print_feats)
+      printf("%s\t", w->feats);
+    if(print_head)
+      printf("%d\t", w->head);
+    if(print_deprel)
+      printf("%s\t", w->deprel);
+    if(i == s->l - 1)
+      fprintf(stdout, "1\n");
+    else
+      fprintf(stdout, "0\n");
+  }
+}
+
+void conll_print_sentence_mcf3(conll_sentence *s, char *columns, int nb_col)
+{
+  int i,j;
+  conll_word *w;
+
+  if((s->l == 1) || (s->l == 0)) return;
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    for(j=0; j < nb_col; j++)
+      switch(columns[j]){
+      case 'I':
+	printf("%d\t", w->id);
+	break;
+      case 'W':
+	printf("%s\t", w->form);
+	break;
+      case 'L':
+	printf("%s\t", w->lemma);
+	break;
+      case 'C':
+	printf("%s\t", w->cpostag);
+	break;
+      case 'P':
+	printf("%s\t", w->postag);
+	break;
+      case 'F':
+	printf("%s\t", w->feats);
+	break;
+      case 'H':
+	printf("%d\t", w->head);
+	break;
+      case 'D':
+	printf("%s\t", w->deprel);
+	break;
+      case 'G':
+	printf("%s\t", w->language);
+	break;
+      }
+    if(i == s->l - 1)
+      fprintf(stdout, "1\n");
+    else
+      fprintf(stdout, "0\n");
+  }
+}
+
+void conll_print_sentence_mcf(conll_sentence *s, int coarse_pos)
+{
+  int i;
+  conll_word *w;
+
+  if((s->l == 1) || (s->l == 0)) return;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    /* fprintf(stdout, "%d", w->id); */
+    fprintf(stdout, "%s", w->form);
+    if(coarse_pos)
+      fprintf(stdout, "\t%s", w->cpostag);
+    else
+      fprintf(stdout, "\t%s", w->postag);
+    fprintf(stdout, "\t%s", w->lemma);
+    if(w->mother == NULL)
+      fprintf(stdout, "\t0");
+    else
+      fprintf(stdout, "\t%d", w->mother->id - w->id);
+    fprintf(stdout, "\t%s", w->deprel);
+    if(i == s->l - 1)
+      fprintf(stdout, "\t1");
+    else
+      fprintf(stdout, "\t0");
+    fprintf(stdout, "\n");
+  }
+}
+
+
+void conll_print_sentence(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  if((s->l == 1) || (s->l == 0)) return;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    fprintf(stdout, "%d", w->id);
+    fprintf(stdout, "\t%s", w->form);
+    fprintf(stdout, "\t%s", w->lemma);
+    fprintf(stdout, "\t%s", w->cpostag);
+    fprintf(stdout, "\t%s", w->postag);
+    fprintf(stdout, "\t%s", w->feats);
+    if(w->mother == NULL)
+      fprintf(stdout, "\t0");
+    else
+      fprintf(stdout, "\t%d", w->mother->id);
+    fprintf(stdout, "\t%s", w->deprel);
+    fprintf(stdout, "\t_\t_\n");
+    
+  }
+  printf("\n");
+
+}
+
+void conll_compact_sentence(conll_sentence *s)
+{
+  int i,j;
+  for(i=0; i < s->l; i++){
+    if(s->words[i] == NULL){
+      for(j = i; j < s->l - 1; j++){
+	s->words[j] = s->words[j+1];
+      }
+      i--;
+      s->l--;
+    }
+  }
+}
+
+void conll_add_daughter(conll_word *daughter, conll_word *mother)
+{
+  if(daughter){
+    if(mother){
+      daughter->mother = mother;
+      mother->daughters[mother->daughters_nb] = daughter;
+      mother->daughters_nb++;
+    } 
+    else{
+      daughter->mother = NULL;
+    }
+  }
+}
+
+void conll_remove_daughter(conll_sentence *s, int i)
+{
+  int j,k;
+  conll_word *dep = s->words[i];
+  conll_word *gov;
+  if(dep){
+    gov = dep->mother;
+    if(gov){
+      for(j=0; j < gov->daughters_nb; j++){
+	if(gov->daughters[j] == dep){
+	  for(k=j; k < gov->daughters_nb - 1; k++){
+	    gov->daughters[k] = gov->daughters[k+1]; 
+	  }
+	  gov->daughters_nb--;
+	}
+      }
+    }
+  }
+}
+
+void conll_remove_word_rec(conll_sentence *s, int i)
+{
+  int j;
+  conll_word *w = s->words[i];
+
+  for(j=1; j < s->l; j++){
+    if((s->words[j]) && (s->words[j]->mother == w))
+      conll_remove_word_rec(s, j);
+  }
+  conll_remove_daughter(s, i);
+  free(w);
+  s->words[i] = NULL;
+}
+
+void conll_remove_subtree(conll_sentence *s, int root)
+{
+  conll_remove_word_rec(s, root);
+  conll_compact_sentence(s);
+}
+
+void conll_add_word(conll_sentence *s, conll_word *w, int index, conll_word *gov)
+{
+  int i;
+  if(s->words[index] != NULL){
+    for(i=s->l; i>index; i--){
+      s->words[i] = s->words[i-1];
+    }
+    s->l++;
+  }
+  s->words[index] = w;
+  if(index >= s->l) s->l = index+1;
+  if(gov != NULL)
+    conll_add_daughter(w, gov);
+}
+ 
+void conll_split_node_in_two(conll_sentence *s, int index, conll_word *gov, conll_word *dep, int index_gov, int index_dep)
+{
+  int i;
+  conll_word *w = s->words[index];
+  conll_word *mother = w->mother;
+
+  strcpy(gov->deprel, w->deprel); 
+  for(i=1; i < s->l; i++){
+    if(s->words[i]->mother == w)
+      conll_add_daughter(s->words[i], gov);
+  }
+  free(w);
+  s->words[index] = NULL;
+  conll_add_word(s, gov, index_gov, mother);
+  conll_add_word(s, dep, index_dep, gov); 
+} 
+
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+
+void conll_change_cpos(conll_sentence *s, hash_str *h_cpos)
+{
+  int i;
+  conll_word *w;
+  char *val;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+
+    val = hash_str_get_val (h_cpos, w->cpostag);
+    if(val){
+      strcpy(w->cpostag, val);
+    }
+    else{
+      fprintf(stderr, "ATTENTION: cpos %s inconnue\n", w->cpostag);
+    }
+  }
+
+}
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+void conll_change_pos(conll_sentence *s, hash_str *h_pos)
+{
+  int i;
+  conll_word *w;
+  char *val;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    
+    val = hash_str_get_val (h_pos, w->postag);
+    if(val){
+      strcpy(w->postag, val);
+    }
+    else{
+      fprintf(stderr, "ATTENTION: pos %s inconnue\n", w->cpostag);
+    }
+  }
+
+}
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+
+
+
+void conll_change_fct(conll_sentence *s, hash_str *h_fct)
+{
+  int i;
+  conll_word *w;
+  char *val;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+
+    val = hash_str_get_val (h_fct, w->deprel);
+    if(val){
+      strcpy(w->deprel, val);
+    }
+    else{
+      fprintf(stderr, "ATTENTION: fct %s inconnue\n", w->deprel);
+    }
+  }
+
+}
+
+int conll_is_num(char *s)
+{
+  int i;
+  int l;
+  if(s == NULL) return 0;
+  l = strlen(s);
+  if((l == 1) && (s[0] == ',')) return 0;
+  for(i=0; i <l; i++)
+    if(((s[i] < '0') || (s[i] > '9')) && (s[i] != ','))
+      return 0;
+  return 1;
+} 
+
+
+void conll_renumber_sentence_offset(conll_sentence *s, int offset)
+{
+  int i;
+  for(i=0 ; i < s->l; i++){
+    s->words[i]->id = i + offset;
+  }
+}
diff --git a/maca_common/src/hash_str.c b/maca_common/src/hash_str.c
new file mode 100644
index 0000000..4bc2752
--- /dev/null
+++ b/maca_common/src/hash_str.c
@@ -0,0 +1,118 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include"hash_str.h"
+#include"util.h"
+
+hash_str_cell *hash_str_cell_new(char *key, char *val, hash_str_cell *next)
+{
+  hash_str_cell *c = (hash_str_cell *)memalloc(sizeof(hash_str_cell));
+  c->val = val;
+  c->key = key;
+  c->next = next;
+  return c;
+}
+
+void hash_str_cell_free(hash_str_cell *c)
+{
+  if(c == NULL) return;
+  hash_str_cell_free(c->next);
+  free(c->key);
+  free(c->val);
+  free(c);
+}
+
+
+hash_str *hash_str_new(int size)
+{
+  int i;
+  hash_str *h = (hash_str *)memalloc(sizeof(hash_str));
+  h->size = size;
+  h->nbelem = 0;
+  h->array = (hash_str_cell **)memalloc(size * sizeof(hash_str_cell *));
+  for(i=0; i < size; i++)
+    h->array[i] = NULL;
+  return h;
+}
+
+void hash_str_free(hash_str *h)
+{
+  int i;
+  for(i=0; i < h->size; i++)
+    hash_str_cell_free(h->array[i]);
+  free(h);
+}
+
+int hash_str_func(char *key, int size)
+{
+  int i;
+  int l = strlen(key);
+  int val = key[0];
+  for(i=1; i < l; i++)
+    val = val + i *i * abs(key[i]);
+  return val % size;
+}
+
+hash_str_cell *hash_str_lookup(hash_str *h, char *key)
+{
+  int index = hash_str_func(key, h->size);
+  hash_str_cell *c;
+  /* printf("index = %d\n", index); */
+
+  for(c=h->array[index]; c; c = c->next){
+    /* printf("dans la boucle index = %d c = %d\n", index, h->array[index]); */
+    if(!strcmp(key, c->key))
+      return c;
+  }
+  return NULL;
+}
+
+char *hash_str_get_val(hash_str *h, char *key)
+{
+  int index = hash_str_func(key, h->size);
+  hash_str_cell *c;
+  for(c=h->array[index]; c; c = c->next)
+    if(!strcmp(key, c->key))
+      return c->val;
+  return HASH_STR_INVALID_VAL;
+}
+
+void hash_str_add(hash_str *h, char *key, char *val)
+{
+  int index;
+  /* printf("add couple (%s %s)\n", key, val); */
+
+  if(hash_str_lookup(h, key)) return;
+  index = hash_str_func(key, h->size);
+  h->array[index] = hash_str_cell_new(key, val, h->array[index]);
+  h->nbelem++;
+}
+
+int hash_str_cell_nb(hash_str_cell *c)
+{
+  if(c == NULL) return 0;
+  return 1 + hash_str_cell_nb(c->next);
+}
+
+void hash_str_stats(hash_str *h)
+{
+  int max = 0;
+  int i,l;
+  int *table;
+  int nb;
+
+  for(i=0; i < h->size; i++)
+    if((l = hash_str_cell_nb(h->array[i])) > max)
+    max = l;
+  nb = max + 1;
+  table = (int *)memalloc(nb * sizeof(int));
+  for(i=0; i < nb; i++)
+    table[i] = 0;
+  for(i=0; i < h->size; i++)
+    table[hash_str_cell_nb(h->array[i])]++;
+  
+  for(i=0; i < nb; i++)
+    printf("%d %d\n", i, table[i]);
+
+  
+}
diff --git a/maca_corpora/CMakeLists.txt b/maca_corpora/CMakeLists.txt
new file mode 100644
index 0000000..8d88845
--- /dev/null
+++ b/maca_corpora/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(lib)
+add_subdirectory(exec)
diff --git a/maca_corpora/exec/CMakeLists.txt b/maca_corpora/exec/CMakeLists.txt
new file mode 100644
index 0000000..d07dd6b
--- /dev/null
+++ b/maca_corpora/exec/CMakeLists.txt
@@ -0,0 +1,12 @@
+#compiling, linking and installing executables
+
+add_executable(ptb2en ptb2en.c)
+target_link_libraries(ptb2en maca_common)
+target_link_libraries(ptb2en maca_corpora)
+install (TARGETS ptb2en DESTINATION bin)
+
+add_executable(ftb2fr ftb2fr.c)
+target_link_libraries(ftb2fr maca_common)
+target_link_libraries(ftb2fr maca_corpora)
+install (TARGETS ftb2fr DESTINATION bin)
+
diff --git a/maca_corpora/exec/ftb2fr.c b/maca_corpora/exec/ftb2fr.c
new file mode 100644
index 0000000..6d422ba
--- /dev/null
+++ b/maca_corpora/exec/ftb2fr.c
@@ -0,0 +1,237 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<strings.h>
+#include<math.h>
+#include<getopt.h>
+#include"conll_lib.h" 
+#include"hash_str.h" 
+#include"ftb_lib.h" 
+
+typedef struct options
+{
+  FILE * fd_parses;                    // parser output
+  int verbose_level;
+  int snum;
+  char *filename;
+  hash_str *h_pos;
+  hash_str *h_fct;
+} options;
+
+void change_pos_fr(conll_sentence *s, hash_str *h_pos)
+{
+  int i;
+  conll_word *w;
+  char *val;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    
+    if(!strcmp(w->postag, "ADV")){
+      if(!strcmp(w->feats, "s=neg"))
+	strcpy(w->postag, "advneg");
+      else
+	strcpy(w->postag, "adv");
+      continue;
+    }
+    
+    if(!strcmp(w->postag, "PONCT")){
+      if(!strcmp(w->feats, "s=s"))
+	strcpy(w->postag, "poncts");
+      else
+	strcpy(w->postag, "ponctw");
+      continue;
+    }
+
+    val = hash_str_get_val (h_pos, w->postag);
+    if(val){
+      strcpy(w->postag, val);
+    }
+    else{
+      fprintf(stderr, "ATTENTION: pos %s inconnue\n", w->postag);
+    }
+  }
+}
+
+
+void change_pos_and_cpos_of_dot(conll_sentence *s, options *op)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i < s->l; i++){
+    w = s->words[i];
+    if(w){
+      if(!strcmp(s->words[i]->form, ".")){
+	strcpy(s->words[i]->postag, "poncts");
+	strcpy(s->words[i]->cpostag, "poncts");
+      }
+    }
+  }
+}
+
+/*---------------------------------------------------------------------------------*/
+
+options op;
+
+void print_options(options *op)
+{
+  fprintf(stderr, "file name = %s\n", op->filename);
+  fprintf(stderr, "verbose level = %d\n", op->verbose_level);
+  fprintf(stderr, "maximum number of sentences to process = %d\n", op->snum);
+}
+
+void reset_options(options * op)
+{
+  op->filename = NULL;
+  op->fd_parses = NULL;
+  op->verbose_level = 0;
+  op->snum = 100000000;
+  op->h_pos =  hash_str_new(100);
+
+  hash_str_add(op->h_pos, strdup("ADJ"), strdup("adj"));
+  hash_str_add(op->h_pos, strdup("ADJWH"), strdup("adj"));
+  hash_str_add(op->h_pos, strdup("ADV"), strdup("adv"));
+  hash_str_add(op->h_pos, strdup("ADVWH"), strdup("adv"));
+  hash_str_add(op->h_pos, strdup("CC"), strdup("coo"));
+  hash_str_add(op->h_pos, strdup("CLO"), strdup("clo"));
+  hash_str_add(op->h_pos, strdup("CLR"), strdup("clr"));
+  hash_str_add(op->h_pos, strdup("CLS"), strdup("cln"));
+  hash_str_add(op->h_pos, strdup("CS"), strdup("csu"));
+  hash_str_add(op->h_pos, strdup("DET"), strdup("det"));
+  hash_str_add(op->h_pos, strdup("DETWH"), strdup("det"));
+  hash_str_add(op->h_pos, strdup("ET"), strdup("etr"));
+  hash_str_add(op->h_pos, strdup("I"), strdup("pres"));
+  hash_str_add(op->h_pos, strdup("NC"), strdup("nc"));
+  hash_str_add(op->h_pos, strdup("NPP"), strdup("np"));
+  hash_str_add(op->h_pos, strdup("P"), strdup("prep"));
+  hash_str_add(op->h_pos, strdup("P+D"), strdup("prep"));
+  hash_str_add(op->h_pos, strdup("PONCT"), strdup(""));
+  hash_str_add(op->h_pos, strdup("P+PRO"), strdup("prep"));
+  hash_str_add(op->h_pos, strdup("PREF"), strdup("pref"));
+  hash_str_add(op->h_pos, strdup("PRO"), strdup("pro"));
+  hash_str_add(op->h_pos, strdup("PROREL"), strdup("prorel"));
+  hash_str_add(op->h_pos, strdup("PROWH"), strdup("pri"));
+  hash_str_add(op->h_pos, strdup("V"), strdup("v"));
+  hash_str_add(op->h_pos, strdup("VIMP"), strdup("v"));
+  hash_str_add(op->h_pos, strdup("VINF"), strdup("vinf"));
+  hash_str_add(op->h_pos, strdup("VPP"), strdup("vppart"));
+  hash_str_add(op->h_pos, strdup("VPR"), strdup("vprespart"));
+  hash_str_add(op->h_pos, strdup("VS"), strdup("v"));
+
+  op->h_fct =  hash_str_new(100);
+
+  hash_str_add(op->h_fct, strdup("aff"), strdup("aff"));
+  hash_str_add(op->h_fct, strdup("a_obj"), strdup("a_obj"));
+  hash_str_add(op->h_fct, strdup("arg"), strdup("arg"));
+  hash_str_add(op->h_fct, strdup("ato"), strdup("ato"));
+  hash_str_add(op->h_fct, strdup("ats"), strdup("ats"));
+  hash_str_add(op->h_fct, strdup("aux_caus"), strdup("aux_caus"));
+  hash_str_add(op->h_fct, strdup("aux_pass"), strdup("aux_pass"));
+  hash_str_add(op->h_fct, strdup("aux_tps"), strdup("aux_tps"));
+  hash_str_add(op->h_fct, strdup("comp"), strdup("comp"));
+  hash_str_add(op->h_fct, strdup("coord"), strdup("coord"));
+  hash_str_add(op->h_fct, strdup("de_obj"), strdup("de_obj"));
+  hash_str_add(op->h_fct, strdup("dep"), strdup("dep"));
+  hash_str_add(op->h_fct, strdup("dep_coord"), strdup("dep_coord"));
+  hash_str_add(op->h_fct, strdup("det"), strdup("det"));
+  hash_str_add(op->h_fct, strdup("missinghead"), strdup("missinghead"));
+  hash_str_add(op->h_fct, strdup("mod"), strdup("mod"));
+  hash_str_add(op->h_fct, strdup("mod_rel"), strdup("mod_rel"));
+  hash_str_add(op->h_fct, strdup("obj"), strdup("obj"));
+  hash_str_add(op->h_fct, strdup("obj1"), strdup("obj"));
+  hash_str_add(op->h_fct, strdup("p_obj"), strdup("p_obj"));
+  hash_str_add(op->h_fct, strdup("ponct"), strdup("ponct"));
+  hash_str_add(op->h_fct, strdup("root"), strdup("root"));
+  hash_str_add(op->h_fct, strdup("suj"), strdup("suj"));
+}
+
+/*---------------------------------------------------------------------------------*/
+void  print_help_message(char *program_name)
+{
+  fprintf(stderr, "%s usage: %s [options]\n", program_name, program_name);
+  fprintf(stderr, "OPTIONS :\n");
+  fprintf(stderr, "      -f <file>     : hypothesis conll file\n");
+  fprintf(stderr, "      -n <int>      : process n sentences (default is 100 000 000)\n");
+  fprintf(stderr, "      -v 1|2|3      : verbosity level\n");
+  fprintf(stderr, "      -h            : print this message\n");
+}
+
+/*---------------------------------------------------------------------------------*/
+
+void parse_options(int argc, char *argv[], options * op)
+{
+  char c;
+
+  reset_options(op);
+
+  if(argc ==1){
+    print_help_message(argv[0]);
+    exit(1);
+  }
+  
+  while ((c = getopt (argc, argv, "hf:n:v:")) != -1)
+    switch (c)
+      {
+      case 'h':
+	print_help_message(argv[0]);
+	exit(0);
+      case 'f':
+	op->filename = strdup(optarg);
+	if((op->fd_parses = fopen(op->filename, "r")) == NULL){
+	  fprintf(stderr, "cannot open hypothesis file %s : aborting\n", op->filename);
+	  exit(1);
+	}
+	break;
+      case 'n':
+	op->snum = atoi(optarg);
+	break;
+      case 'v':
+	op->verbose_level = atoi(optarg);
+	break;
+      }
+  
+  if (op->fd_parses == NULL){
+    fprintf(stderr, "error : cannot open parse file: aborting\n");
+    exit(1);
+  }
+}
+
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+int main(int argc, char *argv[])
+{
+  conll_sentence *s = conll_allocate_sentence();
+  int snum = 0;
+  int res;
+  
+  parse_options(argc, argv, &op);
+  print_options(&op); 
+  for(res = conll_load_sentence(op.fd_parses, s); res && (snum < op.snum); res = conll_load_sentence(op.fd_parses, s)){
+    s->num = snum;
+
+    /* if(s->l > 200) continue; */
+    //    if(!sentence_ends_with_poncts(s)) continue;
+    if(ftb_number_of_roots_in_sentence(s) != 1) continue;
+    if(ftb_sentence_contains_missinghead(s)) continue;
+
+    snum++;
+    
+    /* change_pos_and_cpos_of_dot(s, &op); */
+    ftb_change_form_and_lemma_of_numbers(s);
+    change_pos_fr(s, op.h_pos);
+      /* change_label_of_last_dep(s);    */
+    ftb_retokenize_three_dots(s);
+    ftb_tokenize_dot(s, "titre", "poncts", "abbrev");
+    conll_renumber_sentence(s);
+    //    conll_compute_relative_index_of_heads(s);
+    conll_print_sentence(s);
+    //    print_sentence_no_newline(s);
+    
+  }
+  fprintf(stderr, "\n");
+  fclose(op.fd_parses);
+  conll_free_sentence(s);
+  return 0;
+}
diff --git a/maca_corpora/exec/ptb2en.c b/maca_corpora/exec/ptb2en.c
new file mode 100644
index 0000000..d5545d7
--- /dev/null
+++ b/maca_corpora/exec/ptb2en.c
@@ -0,0 +1,275 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<strings.h>
+#include<math.h>
+#include<getopt.h>
+#include"conll_lib.h" 
+#include"hash_str.h" 
+
+typedef struct options
+{
+  FILE * fd_parses;                    // parser output
+  int verbose_level;
+  int snum;
+  char *filename;
+  hash_str *h_pos;
+  hash_str *h_fct;
+} options;
+
+void tokenize_dot_ptb(conll_sentence *s, char *dep_postag, char *label)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    /* printf("form = %s\n", w->form); */
+    if((strlen(w->form) > 1)
+       && (strcmp(w->form, "..."))
+       && (w->form[strlen(w->form) - 1] == '.')){
+      conll_word *abbrev = conll_copy_word(w);
+      abbrev->form[strlen(abbrev->form) - 1] = '\0';
+      //      strcpy(abbrev->postag, gov_postag); 
+      // strcpy(abbrev->cpostag, gov_postag);
+      if(w->lemma[strlen(w->lemma) - 1] == '.') abbrev->lemma[strlen(abbrev->lemma) - 1] = '\0';
+      /* conll_word *dot = conll_allocate_word(i, ".", ".", "poncts", "poncts", "NULL", -1, "abbrev"); */
+      conll_word *dot = conll_allocate_word(i, ".", ".", dep_postag, dep_postag, "_", -1, label);
+
+      conll_split_node_in_two(s, i, abbrev, dot, i, i+1);
+
+    }
+
+  }
+}
+
+void change_pos_and_cpos_of_dot(conll_sentence *s, options *op)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i < s->l; i++){
+    w = s->words[i];
+    if(w){
+      if(!strcmp(s->words[i]->form, ".")){
+	strcpy(s->words[i]->postag, ".");
+	strcpy(s->words[i]->cpostag, ".");
+      }
+    }
+  }
+}
+
+void print_sentence_no_newline_en(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  if((s->l == 1) || (s->l == 0)) return;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    /* fprintf(stdout, "%d", w->id); */
+    /* fprintf(stdout, "\t%s", w->form); */
+    fprintf(stdout, "%s", w->form);
+    fprintf(stdout, "\t%s", w->postag);
+    fprintf(stdout, "\t%s", w->postag);
+    fprintf(stdout, "\t%s", w->lemma);
+    /* fprintf(stdout, "\t%s", w->cpostag); */
+
+    fprintf(stdout, "\t%d", w->head);
+
+    /*    if(w->mother == NULL)
+      fprintf(stdout, "\t0");
+    else
+    fprintf(stdout, "\t%d", w->mother->id - w->id);*/
+    /*
+    else{
+      if(strcmp(w->deprel, "root"))
+	fprintf(stdout, "\t%d", w->mother->id - w->id);
+      else
+	fprintf(stdout, "\t%d", w->mother->id - w->id);
+        fprintf(stdout, "\t%d", 0);
+	}*/
+    fprintf(stdout, "\t%s", w->deprel);
+    /* if(!strcmp(w->deprel, "eos")) */
+    /* if(!strcmp(w->deprel, "ponct") && !strcmp(w->postag, "poncts")) */
+     if(i == s->l -  1) 
+       fprintf(stdout, "\t1");
+    else
+      fprintf(stdout, "\t0");
+      
+
+    fprintf(stdout, "\n");
+
+    /* fprintf(stdout, "\t_\t_\n"); */
+    
+  }
+
+}
+
+
+/*---------------------------------------------------------------------------------*/
+
+options op;
+
+void print_options(options *op)
+{
+  fprintf(stderr, "file name = %s\n", op->filename);
+  fprintf(stderr, "verbose level = %d\n", op->verbose_level);
+  fprintf(stderr, "maximum number of sentences to process = %d\n", op->snum);
+}
+
+void reset_options(options * op)
+{
+  op->filename = NULL;
+  op->fd_parses = NULL;
+  op->verbose_level = 0;
+  op->snum = 100000000;
+  op->h_pos =  hash_str_new(100);
+
+  hash_str_add(op->h_pos, strdup("ADJ"), strdup("adj"));
+  hash_str_add(op->h_pos, strdup("ADJWH"), strdup("adj"));
+  hash_str_add(op->h_pos, strdup("ADV"), strdup("adv"));
+  hash_str_add(op->h_pos, strdup("ADVWH"), strdup("adv"));
+  hash_str_add(op->h_pos, strdup("CC"), strdup("coo"));
+  hash_str_add(op->h_pos, strdup("CLO"), strdup("clo"));
+  hash_str_add(op->h_pos, strdup("CLR"), strdup("clr"));
+  hash_str_add(op->h_pos, strdup("CLS"), strdup("cln"));
+  hash_str_add(op->h_pos, strdup("CS"), strdup("csu"));
+  hash_str_add(op->h_pos, strdup("DET"), strdup("det"));
+  hash_str_add(op->h_pos, strdup("DETWH"), strdup("det"));
+  hash_str_add(op->h_pos, strdup("ET"), strdup("etr"));
+  hash_str_add(op->h_pos, strdup("I"), strdup("pres"));
+  hash_str_add(op->h_pos, strdup("NC"), strdup("nc"));
+  hash_str_add(op->h_pos, strdup("NPP"), strdup("np"));
+  hash_str_add(op->h_pos, strdup("P"), strdup("prep"));
+  hash_str_add(op->h_pos, strdup("P+D"), strdup("prep"));
+  hash_str_add(op->h_pos, strdup("PONCT"), strdup(""));
+  hash_str_add(op->h_pos, strdup("P+PRO"), strdup("prep"));
+  hash_str_add(op->h_pos, strdup("PREF"), strdup("pref"));
+  hash_str_add(op->h_pos, strdup("PRO"), strdup("pro"));
+  hash_str_add(op->h_pos, strdup("PROREL"), strdup("prorel"));
+  hash_str_add(op->h_pos, strdup("PROWH"), strdup("pri"));
+  hash_str_add(op->h_pos, strdup("V"), strdup("v"));
+  hash_str_add(op->h_pos, strdup("VIMP"), strdup("v"));
+  hash_str_add(op->h_pos, strdup("VINF"), strdup("vinf"));
+  hash_str_add(op->h_pos, strdup("VPP"), strdup("vppart"));
+  hash_str_add(op->h_pos, strdup("VPR"), strdup("vprespart"));
+  hash_str_add(op->h_pos, strdup("VS"), strdup("v"));
+
+  op->h_fct =  hash_str_new(100);
+
+  hash_str_add(op->h_fct, strdup("aff"), strdup("aff"));
+  hash_str_add(op->h_fct, strdup("a_obj"), strdup("a_obj"));
+  hash_str_add(op->h_fct, strdup("arg"), strdup("arg"));
+  hash_str_add(op->h_fct, strdup("ato"), strdup("ato"));
+  hash_str_add(op->h_fct, strdup("ats"), strdup("ats"));
+  hash_str_add(op->h_fct, strdup("aux_caus"), strdup("aux_caus"));
+  hash_str_add(op->h_fct, strdup("aux_pass"), strdup("aux_pass"));
+  hash_str_add(op->h_fct, strdup("aux_tps"), strdup("aux_tps"));
+  hash_str_add(op->h_fct, strdup("comp"), strdup("comp"));
+  hash_str_add(op->h_fct, strdup("coord"), strdup("coord"));
+  hash_str_add(op->h_fct, strdup("de_obj"), strdup("de_obj"));
+  hash_str_add(op->h_fct, strdup("dep"), strdup("dep"));
+  hash_str_add(op->h_fct, strdup("dep_coord"), strdup("dep_coord"));
+  hash_str_add(op->h_fct, strdup("det"), strdup("det"));
+  hash_str_add(op->h_fct, strdup("missinghead"), strdup("missinghead"));
+  hash_str_add(op->h_fct, strdup("mod"), strdup("mod"));
+  hash_str_add(op->h_fct, strdup("mod_rel"), strdup("mod_rel"));
+  hash_str_add(op->h_fct, strdup("obj"), strdup("obj"));
+  hash_str_add(op->h_fct, strdup("obj1"), strdup("obj"));
+  hash_str_add(op->h_fct, strdup("p_obj"), strdup("p_obj"));
+  hash_str_add(op->h_fct, strdup("ponct"), strdup("ponct"));
+  hash_str_add(op->h_fct, strdup("root"), strdup("root"));
+  hash_str_add(op->h_fct, strdup("suj"), strdup("suj"));
+}
+
+/*---------------------------------------------------------------------------------*/
+void  print_help_message(char *program_name)
+{
+  fprintf(stderr, "%s usage: %s [options]\n", program_name, program_name);
+  fprintf(stderr, "OPTIONS :\n");
+  fprintf(stderr, "      -f <file>     : hypothesis conll file\n");
+  fprintf(stderr, "      -n <int>      : process n sentences (default is 100 000 000)\n");
+  fprintf(stderr, "      -v 1|2|3      : verbosity level\n");
+  fprintf(stderr, "      -h            : print this message\n");
+}
+
+/*---------------------------------------------------------------------------------*/
+
+void parse_options(int argc, char *argv[], options * op)
+{
+  char c;
+
+  reset_options(op);
+
+  if(argc ==1){
+    print_help_message(argv[0]);
+    exit(1);
+  }
+  
+  while ((c = getopt (argc, argv, "hf:n:v:")) != -1)
+    switch (c)
+      {
+      case 'h':
+	print_help_message(argv[0]);
+	exit(0);
+      case 'f':
+	op->filename = strdup(optarg);
+	if((op->fd_parses = fopen(op->filename, "r")) == NULL){
+	  fprintf(stderr, "cannot open hypothesis file %s : aborting\n", op->filename);
+	  exit(1);
+	}
+	break;
+      case 'n':
+	op->snum = atoi(optarg);
+	break;
+      case 'v':
+	op->verbose_level = atoi(optarg);
+	break;
+      }
+  
+  if (op->fd_parses == NULL){
+    fprintf(stderr, "error : cannot open parse file: aborting\n");
+    exit(1);
+  }
+}
+
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+int main(int argc, char *argv[])
+{
+  conll_sentence *s = conll_allocate_sentence();
+  int snum = 0;
+  int res;
+  
+  parse_options(argc, argv, &op);
+  print_options(&op); 
+  for(res = conll_load_sentence(op.fd_parses, s); res && (snum < op.snum); res = conll_load_sentence(op.fd_parses, s)){
+    s->num = snum;
+
+    /* if(s->l > 200) continue; */
+    //    if(!sentence_ends_with_poncts(s)) continue;
+    //    if(number_of_roots_in_sentence(s) != 1) continue;
+    //if(sentence_contains_missinghead(s)) continue;
+
+    snum++;
+    
+     change_pos_and_cpos_of_dot(s, &op); 
+    //change_form_and_lemma_of_numbers(s);
+    //change_pos_fr(s, op.h_pos);
+      /* change_label_of_last_dep(s);    */
+    //retokenize_three_dots(s);
+    tokenize_dot_ptb(s, ".", "ABBREV");
+    conll_renumber_sentence(s);
+    //    conll_compute_relative_index_of_heads(s);
+    //    print_sentence_no_newline_en(s);
+    conll_print_sentence(s);
+    
+  }
+  fprintf(stderr, "\n");
+  fclose(op.fd_parses);
+  conll_free_sentence(s);
+  return 0;
+}
diff --git a/maca_corpora/lib/CMakeLists.txt b/maca_corpora/lib/CMakeLists.txt
new file mode 100644
index 0000000..a0ca33d
--- /dev/null
+++ b/maca_corpora/lib/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(SOURCES
+  src/ftb_lib.c
+  src/orfeo_lib.c
+)
+
+#compiling library
+include_directories(src)
+add_library(maca_corpora STATIC ${SOURCES})
+
+find_library(M_LIB m)
+target_link_libraries(maca_corpora ${M_LIB})
diff --git a/maca_corpora/lib/include/ftb_lib.h b/maca_corpora/lib/include/ftb_lib.h
new file mode 100644
index 0000000..a8f238d
--- /dev/null
+++ b/maca_corpora/lib/include/ftb_lib.h
@@ -0,0 +1,38 @@
+/*******************************************************************************
+    Copyright (C) 2010 by Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
+                      and Joseph Le Roux <joseph.le.roux@gmail.com>
+    conll_lib is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    conll_lib is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with conll_lib. If not, see <http://www.gnu.org/licenses/>.
+*******************************************************************************/
+
+#ifndef __FTB_LIB__
+#define __FTB_LIB__
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "conll_lib.h"
+#include "ftb_lib.h"
+
+int ftb_sentence_ends_with_poncts(conll_sentence *s);
+int ftb_number_of_roots_in_sentence(conll_sentence *s);
+int ftb_sentence_contains_missinghead(conll_sentence *s);
+void ftb_change_form_and_lemma_of_numbers(conll_sentence *s);
+void ftb_change_label_of_last_dep(conll_sentence *s);
+void ftb_retokenize_three_dots(conll_sentence *s);
+void ftb_tokenize_dot(conll_sentence *s, char *gov_postag, char *dep_postag, char *label);
+void ftb_print_sentence_no_newline(conll_sentence *s);
+int ftb_get_root_index(conll_sentence *s);
+void ftb_change_root_head(conll_sentence *s, int new_head_index);
+void ftb_compute_relative_index_of_heads(conll_sentence *s);
+#endif
diff --git a/maca_corpora/lib/include/orfeo_lib.h b/maca_corpora/lib/include/orfeo_lib.h
new file mode 100644
index 0000000..5f32182
--- /dev/null
+++ b/maca_corpora/lib/include/orfeo_lib.h
@@ -0,0 +1,17 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<strings.h>
+#include"conll_lib.h" 
+
+void orfeo_traite_mots_composes(conll_sentence *s);
+void orfeo_traite_nombres(conll_sentence *s);
+void orfeo_traite_amalgames(conll_sentence *s);
+
+
+
+
+
+
+
+
diff --git a/maca_corpora/lib/src/ftb_lib.c b/maca_corpora/lib/src/ftb_lib.c
new file mode 100644
index 0000000..fbf6630
--- /dev/null
+++ b/maca_corpora/lib/src/ftb_lib.c
@@ -0,0 +1,170 @@
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "conll_lib.h"
+#include "ftb_lib.h"
+
+void ftb_change_root_head(conll_sentence *s, int new_head_index)
+{
+  int i;
+  conll_word *w;
+  
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(!strcmp(w->deprel, "root")){
+      w->head = new_head_index;
+      break;
+    }
+  }
+}
+
+int ftb_get_root_index(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+  
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(!strcmp(w->deprel, "root"))
+      return i;
+  }
+  return -1;
+}
+
+
+int ftb_sentence_ends_with_poncts(conll_sentence *s)
+{
+  conll_word *w;
+  
+  w = s->words[s->l-1];
+  if(!strcmp(w->postag, "PONCT") && !strcmp(w->feats, "s=s"))
+    return 1;
+  return 0;
+}
+
+int ftb_number_of_roots_in_sentence(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+  int root_nb = 0;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(!strcmp(w->deprel, "root"))
+      root_nb++;
+  }
+  return root_nb;
+}
+
+int ftb_sentence_contains_missinghead(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(!strcmp(w->deprel, "missinghead"))
+      return 1;
+  }
+  return 0;
+}
+
+
+void ftb_change_form_and_lemma_of_numbers(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(conll_is_num(w->form)){
+      strcpy(w->form, "_NUM_");
+      strcpy(w->lemma, "_NUM_");
+    }
+    
+  }
+}
+
+void ftb_change_label_of_last_dep(conll_sentence *s)
+{
+  if(strcmp(s->words[s->l - 1]->deprel, "root"))
+    strcpy(s->words[s->l - 1]->deprel, "eos");
+}
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+
+void ftb_retokenize_three_dots(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+  int l = s->l;
+  for(i=1; i < l-2; i++){
+    w = s->words[i];
+    if(w){
+      if(!strcmp(s->words[i]->form, ".") && !strcmp(s->words[i+1]->form, ".") && !strcmp(s->words[i+2]->form, ".")){
+	strcpy(s->words[i]->form, "...");
+	strcpy(s->words[i]->lemma, "...");
+	conll_remove_word_rec(s, i+1);
+	conll_remove_word_rec(s, i+2);
+	/* fprintf(stderr, "retokenize ...\n"); */
+      }
+    }
+  }
+  conll_compact_sentence(s);
+}
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+
+void ftb_tokenize_dot(conll_sentence *s, char *gov_postag, char *dep_postag, char *label)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    /* printf("form = %s\n", w->form); */
+    if((strlen(w->form) > 1)
+       && (strcmp(w->form, "..."))
+       && (w->form[strlen(w->form) - 1] == '.')){
+      conll_word *abbrev = conll_copy_word(w);
+      abbrev->form[strlen(abbrev->form) - 1] = '\0';
+      strcpy(abbrev->postag, gov_postag); /*titre*/
+      strcpy(abbrev->cpostag, gov_postag);
+      if(w->lemma[strlen(w->lemma) - 1] == '.') abbrev->lemma[strlen(abbrev->lemma) - 1] = '\0';
+      /* conll_word *dot = allocate_word(i, ".", ".", "poncts", "poncts", "NULL", -1, "abbrev"); */
+      conll_word *dot = conll_allocate_word(i, ".", ".", dep_postag, dep_postag, "NULL", -1, label);
+
+      conll_split_node_in_two(s, i, abbrev, dot, i, i+1);
+
+    }
+
+  }
+}
+
+
+void ftb_print_sentence_no_newline(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  if((s->l == 1) || (s->l == 0)) return;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    fprintf(stdout, "%s", w->form);
+    fprintf(stdout, "\t%s", w->postag);
+    fprintf(stdout, "\t%s", w->feats);
+    fprintf(stdout, "\t%s", w->lemma);
+    fprintf(stdout, "\t%d", w->head);
+    fprintf(stdout, "\t%s", w->deprel);
+    if(i == s->l -  1) 
+      fprintf(stdout, "\t1");
+    else
+      fprintf(stdout, "\t0");
+    fprintf(stdout, "\n");
+  }
+
+}
diff --git a/maca_corpora/lib/src/orfeo_lib.c b/maca_corpora/lib/src/orfeo_lib.c
new file mode 100644
index 0000000..18d8906
--- /dev/null
+++ b/maca_corpora/lib/src/orfeo_lib.c
@@ -0,0 +1,428 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<strings.h>
+#include<math.h>
+#include<getopt.h>
+#include"conll_lib.h" 
+#include"hash_str.h" 
+
+
+void traite_au_revoir(conll_sentence *s, int pos)
+{
+  conll_word *au = conll_allocate_word(-1, "au", "au", "prep", "prep", "NULL", -1, "NULL");
+  conll_word *revoir = conll_allocate_word(-1, "revoir", "revoir", "nc", "nc", "NULL", -1, "OBJ");
+  conll_split_node_in_two(s, pos, au, revoir, pos, pos+1);
+
+}  
+
+void traite_ADV_que(conll_sentence *s, char *form, int pos)
+{
+  int i,j;
+  char form_adv[30];
+  char form_que[30];
+  int l = strlen(form);
+
+
+  for(i=0; (i<l) && (form[i] != '_'); i++){
+    form_adv[i] = form[i];
+  }
+  form_adv[i] = '\0';
+  i++;
+  
+  for(j=0; (i<l); i++, j++){
+    form_que[j] = form[i];
+  }
+  form_que[j] = '\0';
+
+  conll_word *que = conll_allocate_word(-1, form_que, "que", "CSU", "CSU", "NULL", -1, "NULL");
+  conll_word *adv = conll_allocate_word(-1, form_adv, form_adv, "ADV", "ADV", "NULL", -1, "MORPH");
+
+  conll_split_node_in_two(s, pos, que, adv, pos+1, pos);
+
+}  
+
+int chaine_possede_un_underscore(char *m)
+{
+  int i;
+  int l = strlen(m);
+  for(i=0; i < l; i++){
+    if(m[i] == '_'){
+      return 1;
+    }
+  }
+    return 0;
+}
+
+
+int chaine_possede_un_plus(char *m)
+{
+  int i;
+  int l = strlen(m);
+  for(i=0; i < l; i++){
+    if(m[i] == '+'){
+      return 1;
+    }
+  }
+    return 0;
+}
+
+
+
+int chaine_possede_un_moins(char *m)
+{
+  int i;
+  int l = strlen(m);
+  for(i=0; i < l; i++){
+    if(m[i] == '-'){
+      return 1;
+    }
+  }
+    return 0;
+}
+
+
+int chaine_possede_un_plus_ou_un_moins(char *m)
+{
+  int i;
+  int l = strlen(m);
+  for(i=0; i < l; i++){
+    if((m[i] == '-') || (m[i] == '+')){
+      return 1;
+    }
+  }
+    return 0;
+}
+
+
+void orfeo_traite_mots_composes(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(chaine_possede_un_plus(w->form)){
+      if(!strcmp(w->form, "au+revoir")) traite_au_revoir(s, i);
+    }
+    else if(chaine_possede_un_underscore(w->form)){
+      if(!strcmp(w->form, "bien_que") || !strcmp(w->form, "bien_qu'") || !strcmp(w->form, "Bien_que") || !strcmp(w->form, "Bien_qu'")
+	 || !strcmp(w->form, "ainsi_que") || !strcmp(w->form, "ainsi_qu'") || !strcmp(w->form, "Ainsi_que") || !strcmp(w->form, "Ainsi_qu'")
+	 || !strcmp(w->form, "autant_que") || !strcmp(w->form, "autant_qu'") || !strcmp(w->form, "Autant_que") || !strcmp(w->form, "Autant_qu'")
+	 || !strcmp(w->form, "alors_que") || !strcmp(w->form, "alors_qu'") || !strcmp(w->form, "Alors_que") || !strcmp(w->form, "Alors_qu'")
+	 || !strcmp(w->form, "maintenant_que") || !strcmp(w->form, "mainenant_qu'") || !strcmp(w->form, "Maintenant_que") || !strcmp(w->form, "Mainenant_qu'")
+	 || !strcmp(w->form, "encore_que") || !strcmp(w->form, "encore_qu'") || !strcmp(w->form, "Encore_que") || !strcmp(w->form, "Encore_qu'")
+	 || !strcmp(w->form, "plus_que") || !strcmp(w->form, "plus_qu'") || !strcmp(w->form, "Plus_que") || !strcmp(w->form, "Plus_qu'")
+	 || !strcmp(w->form, "tant_que") || !strcmp(w->form, "tant_qu'") || !strcmp(w->form, "Tant_que") || !strcmp(w->form, "Tant_qu'"))
+	traite_ADV_que(s, w->form, i);
+    }
+  }
+}
+/*---------------------------------------------------------------------------------*/
+
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+
+int chaine_est_un_chiffre(char *s)
+{
+
+  if(!strcmp(s, "et")) return 1;
+  if(!strcmp(s, "zéro")) return 1;
+  if(!strcmp(s, "un")) return 1;
+  if(!strcmp(s, "deux")) return 1;
+  if(!strcmp(s, "trois")) return 1;
+  if(!strcmp(s, "quatre")) return 1;
+  if(!strcmp(s, "cinq")) return 1;
+  if(!strcmp(s, "six")) return 1;
+  if(!strcmp(s, "sept")) return 1;
+  if(!strcmp(s, "huit")) return 1;
+  if(!strcmp(s, "neuf")) return 1;
+  if(!strcmp(s, "dix")) return 1;
+  if(!strcmp(s, "onze")) return 1;
+  if(!strcmp(s, "douze")) return 1;
+  if(!strcmp(s, "treize")) return 1;
+  if(!strcmp(s, "quatorze")) return 1;
+  if(!strcmp(s, "quinze")) return 1;
+  if(!strcmp(s, "seize")) return 1;
+  if(!strcmp(s, "vingt")) return 1;
+  if(!strcmp(s, "vingts")) return 1;
+  if(!strcmp(s, "trente")) return 1;
+  if(!strcmp(s, "quarante")) return 1;
+  if(!strcmp(s, "cinquante")) return 1;
+  if(!strcmp(s, "soixante")) return 1;
+  if(!strcmp(s, "cent")) return 1;
+  if(!strcmp(s, "cents")) return 1;
+  if(!strcmp(s, "mille")) return 1;
+  if(!strcmp(s, "milles")) return 1;
+  if(!strcmp(s, "million")) return 1;
+  if(!strcmp(s, "millions")) return 1;
+  if(!strcmp(s, "milliard")) return 1;
+  if(!strcmp(s, "milliards")) return 1;
+  return 0;
+}
+
+
+int chaine_est_un_chiffre_sauf_un(char *s)
+{
+
+  if(!strcmp(s, "zéro")) return 1;
+  if(!strcmp(s, "deux")) return 1;
+  if(!strcmp(s, "trois")) return 1;
+  if(!strcmp(s, "quatre")) return 1;
+  if(!strcmp(s, "cinq")) return 1;
+  if(!strcmp(s, "six")) return 1;
+  if(!strcmp(s, "sept")) return 1;
+  if(!strcmp(s, "huit")) return 1;
+  if(!strcmp(s, "neuf")) return 1;
+  if(!strcmp(s, "dix")) return 1;
+  if(!strcmp(s, "onze")) return 1;
+  if(!strcmp(s, "douze")) return 1;
+  if(!strcmp(s, "treize")) return 1;
+  if(!strcmp(s, "quatorze")) return 1;
+  if(!strcmp(s, "quinze")) return 1;
+  if(!strcmp(s, "seize")) return 1;
+  if(!strcmp(s, "vingt")) return 1;
+  if(!strcmp(s, "trente")) return 1;
+  if(!strcmp(s, "quarante")) return 1;
+  if(!strcmp(s, "cinquante")) return 1;
+  if(!strcmp(s, "soixante")) return 1;
+  if(!strcmp(s, "cent")) return 1;
+  if(!strcmp(s, "cents")) return 1;
+  if(!strcmp(s, "mille")) return 1;
+  if(!strcmp(s, "milles")) return 1;
+  if(!strcmp(s, "million")) return 1;
+  if(!strcmp(s, "millions")) return 1;
+  if(!strcmp(s, "milliard")) return 1;
+  if(!strcmp(s, "milliards")) return 1;
+  return 0;
+}
+
+
+
+/*---------------------------------------------------------------------------------*/
+
+int chaine_composee_de_digits(char *orig)
+{
+  int i;
+  int l = strlen(orig);
+
+  if(!strcmp(orig, ",")) return 0;
+
+  for(i=0; i<l; i++)
+    if(((orig[i] > '9') || (orig[i] < '0')) && (orig[i] != ','))
+      return 0;
+       
+  return 1;
+}
+ 
+
+int chaine_est_un_nombre(char *orig)
+{
+  char *c, *s;
+   /* printf("w = %s\n", s);  */
+
+ 
+  if(chaine_composee_de_digits(orig)) return 1;
+  if(chaine_est_un_chiffre_sauf_un(orig)) return 1;
+  if(!chaine_possede_un_plus_ou_un_moins(orig)) return 0;
+  if(!strcmp(orig, "-")) return 0;
+  s = strdup(orig);
+  for(c = strtok(s, "+-"); c; c = strtok(NULL, "+-")){
+    if(!chaine_est_un_chiffre(c)){
+      free(s);
+      return 0;
+    }
+  }
+  
+  free(s);
+  return 1;
+}
+
+/*---------------------------------------------------------------------------------*/
+
+void orfeo_traite_nombres(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i<s->l; i++){
+    w = s->words[i];
+    if(chaine_est_un_nombre(w->form)){
+      /* printf("word = %s lemma = %s\n", w->form, w->lemma);  */
+      strcpy(w->lemma, "NUM");
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+
+void traite_amalgame_du(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "de");
+    strcpy(w->lemma, "de");
+    w2 = conll_allocate_word(i, "le", "le", "DET", "DET", "NULL", 0, "SPEC");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_des(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "de");
+    strcpy(w->lemma, "de");
+    w2 = conll_allocate_word(i, "les", "le", "DET", "DET", "NULL", 0, "SPEC");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_au(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "à");
+    strcpy(w->lemma, "à");
+    w2 = conll_allocate_word(i, "le", "le", "DET", "DET", "NULL", 0, "SPEC");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_aux(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "à");
+    strcpy(w->lemma, "à");
+    w2 = conll_allocate_word(i, "les", "le", "DET", "DET", "NULL", 0, "SPEC");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_auquel(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "à");
+    strcpy(w->lemma, "à");
+    w2 = conll_allocate_word(i, "lequel", "lequel", "PRQ", "PRQ", "NULL", 0, "PRQ");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_auxquels(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "à");
+    strcpy(w->lemma, "à");
+    w2 = conll_allocate_word(i, "lesquels", "lequel", "PRQ", "PRQ", "NULL", 0, "PRQ");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_auxquelles(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "à");
+    strcpy(w->lemma, "à");
+    w2 = conll_allocate_word(i, "lesquelles", "lequel", "PRQ", "PRQ", "NULL", 0, "PRQ");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_duquel(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "de");
+    strcpy(w->lemma, "de");
+    w2 = conll_allocate_word(i, "lequel", "lequel", "PRQ", "PRQ", "NULL", 0, "PRQ");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_desquels(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "de");
+    strcpy(w->lemma, "de");
+    w2 = conll_allocate_word(i, "lesquels", "lequel", "PRQ", "PRQ", "NULL", 0, "PRQ");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void traite_amalgame_desquelles(conll_sentence *s, int i)
+{
+  conll_word *w, *w2, *dep1;
+  w = s->words[i];
+  if((w->mother) && (w->daughters_nb > 0)){
+    dep1 = w->daughters[0];
+    strcpy(w->form, "de");
+    strcpy(w->lemma, "de");
+    w2 = conll_allocate_word(i, "lesquelles", "lequel", "PRQ", "PRQ", "NULL", 0, "PRQ");
+    conll_add_word(s, w2, i+1, dep1);
+  }
+}
+
+void orfeo_traite_amalgames(conll_sentence *s)
+{
+  int i;
+  conll_word *w;
+
+  for(i=1; i<s->l; i++){
+    /*        printf("************ l = %d\n", s->l);
+	      printf("************ i = %d form = %s\n", i, w->form);*/
+    w = s->words[i];
+    if(!strcmp(w->form, "du")) traite_amalgame_du(s, i);
+    else if(!strcmp(w->form, "des")) traite_amalgame_des(s, i);
+    else if(!strcmp(w->form, "au")) traite_amalgame_au(s, i);
+    else if(!strcmp(w->form, "aux")) traite_amalgame_aux(s, i);
+    else if(!strcmp(w->form, "auquel")) traite_amalgame_auquel(s, i);
+    else if(!strcmp(w->form, "auxquels")) traite_amalgame_auxquels(s, i);
+    else if(!strcmp(w->form, "auxquelles")) traite_amalgame_auxquelles(s, i);
+    else if(!strcmp(w->form, "duquel")) traite_amalgame_duquel(s, i);
+    else if(!strcmp(w->form, "desquels")) traite_amalgame_desquels(s, i);
+    else if(!strcmp(w->form, "desquelles")) traite_amalgame_desquelles(s, i);
+
+
+  }
+}
+
+
+
+
+
+
+
+
+
+
diff --git a/maca_tools/CMakeLists.txt b/maca_tools/CMakeLists.txt
index bbf7680..df0ee2e 100644
--- a/maca_tools/CMakeLists.txt
+++ b/maca_tools/CMakeLists.txt
@@ -5,14 +5,18 @@ target_link_libraries(scenes_roots2fann maca_common)
 install (TARGETS scenes_roots2fann DESTINATION bin)
 
 add_executable(mcf2conll ./src/mcf2conll.c)
-target_link_libraries(mcf2conll perceptron)
-target_link_libraries(mcf2conll transparse)
 target_link_libraries(mcf2conll maca_common)
 install (TARGETS mcf2conll DESTINATION bin)
 
+add_executable(conll2mcf ./src/conll2mcf.c)
+target_link_libraries(conll2mcf maca_common)
+install (TARGETS conll2mcf DESTINATION bin)
+
+add_executable(conllu2mcf ./src/conllu2mcf.c)
+target_link_libraries(conllu2mcf maca_common)
+install (TARGETS conllu2mcf DESTINATION bin)
+
 add_executable(mcf2orfeo ./src/mcf2orfeo.c)
-target_link_libraries(mcf2orfeo perceptron)
-target_link_libraries(mcf2orfeo transparse)
 target_link_libraries(mcf2orfeo maca_common)
 install (TARGETS mcf2orfeo DESTINATION bin)
 
diff --git a/maca_tools/src/conll2mcf.c b/maca_tools/src/conll2mcf.c
new file mode 100644
index 0000000..17c13a5
--- /dev/null
+++ b/maca_tools/src/conll2mcf.c
@@ -0,0 +1,158 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<strings.h>
+#include<math.h>
+#include<getopt.h>
+#include"conll_lib.h" 
+#include"hash_str.h" 
+
+#define NB_COL 7
+
+typedef struct options
+{
+  FILE * fd_parses;                    // parser output
+  int verbose_level;
+  int snum;
+  char *filename;
+  char columns[NB_COL];
+} options;
+
+/*---------------------------------------------------------------------------------*/
+
+options op;
+
+void print_options(options *op)
+{
+  fprintf(stderr, "file name = %s\n", op->filename);
+  fprintf(stderr, "verbose level = %d\n", op->verbose_level);
+  fprintf(stderr, "maximum number of sentences to process = %d\n", op->snum);
+}
+
+void reset_options(options * op)
+{
+  int i;
+  op->filename = NULL;
+  op->fd_parses = stdin;
+  op->verbose_level = 0;
+  op->snum = 100000000;
+  for(i=0; i < NB_COL; i++)
+    op->columns[i] = '0';
+}
+
+/*---------------------------------------------------------------------------------*/
+void  print_help_message(char *program_name)
+{
+  fprintf(stderr, "%s usage: %s [options]\n", program_name, program_name);
+  fprintf(stderr, "OPTIONS :\n");
+  fprintf(stderr, "      -f <file>     : hypothesis conll file\n");
+  fprintf(stderr, "      -n <int>      : process n sentences (default is 100 000 000)\n");
+  fprintf(stderr, "      -v 1|2|3      : verbosity level\n");
+  fprintf(stderr, "      -h            : print this message\n");
+
+  fprintf(stderr, "      -1            : content of column 1 in the mcf file produced\n");
+  fprintf(stderr, "      -2            : content of column 2 in the mcf file produced\n");
+  fprintf(stderr, "      -3            : content of column 3 in the mcf file produced\n");
+  fprintf(stderr, "      -4            : content of column 4 in the mcf file produced\n");
+  fprintf(stderr, "      -5            : content of column 5 in the mcf file produced\n");
+  fprintf(stderr, "      -6            : content of column 6 in the mcf file produced\n");
+  fprintf(stderr, "      -7            : content of column 7 in the mcf file produced\n");
+  fprintf(stderr, "                    : values of options -1 to -7 must be one of\n");
+  fprintf(stderr, "                    : I for id\n");
+  fprintf(stderr, "                    : W for form\n");
+  fprintf(stderr, "                    : L for lemma\n");
+  fprintf(stderr, "                    : C for coarse part of speech\n");
+  fprintf(stderr, "                    : P for part of speech\n");
+  fprintf(stderr, "                    : F for features\n");
+  fprintf(stderr, "                    : H for head\n");
+  fprintf(stderr, "                    : D for deprel\n");
+
+}
+
+
+
+
+/*---------------------------------------------------------------------------------*/
+
+void parse_options(int argc, char *argv[], options * op)
+{
+  char c;
+
+  reset_options(op);
+  /*
+  if(argc ==1){
+    print_help_message(argv[0]);
+    exit(1);
+    }*/
+  
+  while ((c = getopt (argc, argv, "hIWLCPFHDf:n:v:1:2:3:4:5:6:7:8:9:")) != -1)
+    switch (c)
+      {
+      case 'h':
+	print_help_message(argv[0]);
+	exit(0);
+      case 'f':
+	op->filename = strdup(optarg);
+	if((op->fd_parses = fopen(op->filename, "r")) == NULL){
+	  fprintf(stderr, "cannot open hypothesis file %s : aborting\n", op->filename);
+	  exit(1);
+	}
+	break;
+      case '1':
+	op->columns[0] = optarg[0];
+	break;
+      case '2':
+	op->columns[1] = optarg[0];
+	break;
+      case '3':
+	op->columns[2] = optarg[0];
+	break;
+      case '4':
+	op->columns[3] = optarg[0];
+	break;
+      case '5':
+	op->columns[4] = optarg[0];
+	break;
+      case '6':
+	op->columns[5] = optarg[0];
+	break;
+      case '7':
+	op->columns[6] = optarg[0];
+	break;
+      case 'n':
+	op->snum = atoi(optarg);
+	break;
+      case 'v':
+	op->verbose_level = atoi(optarg);
+	break;
+      }
+  
+  /*  if (op->fd_parses == NULL){
+    fprintf(stderr, "error : cannot open parse file: aborting\n");
+    exit(1);
+    }*/
+}
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+int main(int argc, char *argv[])
+{
+  conll_sentence *s = conll_allocate_sentence();
+  int snum = 0;
+  int res;
+  parse_options(argc, argv, &op);
+  
+  print_options(&op); 
+  
+  
+  for(res = conll_load_sentence(op.fd_parses, s); res && (snum < op.snum); res = conll_load_sentence(op.fd_parses, s)){
+    s->num = snum;
+    snum++;
+    conll_compute_relative_index_of_heads(s);
+    conll_print_sentence_mcf3(s, op.columns, NB_COL);
+  }
+  if(op.filename)
+    fclose(op.fd_parses);
+  conll_free_sentence(s);
+  return 0;
+}
diff --git a/maca_tools/src/conllu2mcf.c b/maca_tools/src/conllu2mcf.c
new file mode 100644
index 0000000..faabdb0
--- /dev/null
+++ b/maca_tools/src/conllu2mcf.c
@@ -0,0 +1,159 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<strings.h>
+#include<math.h>
+#include<getopt.h>
+#include"conll_lib.h" 
+#include"hash_str.h" 
+
+#define NB_COL 7
+
+typedef struct options
+{
+  FILE * fd_parses;                    // parser output
+  int verbose_level;
+  int snum;
+  char *filename;
+  char columns[NB_COL];
+} options;
+
+/*---------------------------------------------------------------------------------*/
+
+options op;
+
+void print_options(options *op)
+{
+  fprintf(stderr, "file name = %s\n", op->filename);
+  fprintf(stderr, "verbose level = %d\n", op->verbose_level);
+  fprintf(stderr, "maximum number of sentences to process = %d\n", op->snum);
+}
+
+void reset_options(options * op)
+{
+  int i;
+  op->filename = NULL;
+  op->fd_parses = stdin;
+  op->verbose_level = 0;
+  op->snum = 100000000;
+  for(i=0; i < NB_COL; i++)
+    op->columns[i] = '0';
+}
+
+/*---------------------------------------------------------------------------------*/
+void  print_help_message(char *program_name)
+{
+  fprintf(stderr, "%s usage: %s [options]\n", program_name, program_name);
+  fprintf(stderr, "OPTIONS :\n");
+  fprintf(stderr, "      -f <file>     : hypothesis conll file\n");
+  fprintf(stderr, "      -n <int>      : process n sentences (default is 100 000 000)\n");
+  fprintf(stderr, "      -v 1|2|3      : verbosity level\n");
+  fprintf(stderr, "      -h            : print this message\n");
+
+  fprintf(stderr, "      -1            : content of column 1 in the mcf file produced\n");
+  fprintf(stderr, "      -2            : content of column 2 in the mcf file produced\n");
+  fprintf(stderr, "      -3            : content of column 3 in the mcf file produced\n");
+  fprintf(stderr, "      -4            : content of column 4 in the mcf file produced\n");
+  fprintf(stderr, "      -5            : content of column 5 in the mcf file produced\n");
+  fprintf(stderr, "      -6            : content of column 6 in the mcf file produced\n");
+  fprintf(stderr, "      -7            : content of column 7 in the mcf file produced\n");
+  fprintf(stderr, "                    : values of options -1 to -7 must be one of\n");
+  fprintf(stderr, "                    : I for id\n");
+  fprintf(stderr, "                    : W for form\n");
+  fprintf(stderr, "                    : L for lemma\n");
+  fprintf(stderr, "                    : C for coarse part of speech\n");
+  fprintf(stderr, "                    : P for part of speech\n");
+  fprintf(stderr, "                    : F for features\n");
+  fprintf(stderr, "                    : H for head\n");
+  fprintf(stderr, "                    : D for deprel\n");
+  fprintf(stderr, "                    : G for language\n");
+
+}
+
+
+
+
+/*---------------------------------------------------------------------------------*/
+
+void parse_options(int argc, char *argv[], options * op)
+{
+  char c;
+
+  reset_options(op);
+  /*
+  if(argc ==1){
+    print_help_message(argv[0]);
+    exit(1);
+    }*/
+  
+  while ((c = getopt (argc, argv, "hIWLCPFHDf:n:v:1:2:3:4:5:6:7:8:9:")) != -1)
+    switch (c)
+      {
+      case 'h':
+	print_help_message(argv[0]);
+	exit(0);
+      case 'f':
+	op->filename = strdup(optarg);
+	if((op->fd_parses = fopen(op->filename, "r")) == NULL){
+	  fprintf(stderr, "cannot open hypothesis file %s : aborting\n", op->filename);
+	  exit(1);
+	}
+	break;
+      case '1':
+	op->columns[0] = optarg[0];
+	break;
+      case '2':
+	op->columns[1] = optarg[0];
+	break;
+      case '3':
+	op->columns[2] = optarg[0];
+	break;
+      case '4':
+	op->columns[3] = optarg[0];
+	break;
+      case '5':
+	op->columns[4] = optarg[0];
+	break;
+      case '6':
+	op->columns[5] = optarg[0];
+	break;
+      case '7':
+	op->columns[6] = optarg[0];
+	break;
+      case 'n':
+	op->snum = atoi(optarg);
+	break;
+      case 'v':
+	op->verbose_level = atoi(optarg);
+	break;
+      }
+  
+  /*  if (op->fd_parses == NULL){
+    fprintf(stderr, "error : cannot open parse file: aborting\n");
+    exit(1);
+    }*/
+}
+
+/*---------------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------------*/
+int main(int argc, char *argv[])
+{
+  conll_sentence *s = conll_allocate_sentence();
+  int snum = 0;
+  int res;
+  parse_options(argc, argv, &op);
+  
+  print_options(&op); 
+  
+  
+  for(res = conll_load_sentence(op.fd_parses, s); res && (snum < op.snum); res = conll_load_sentence(op.fd_parses, s)){
+    s->num = snum;
+    snum++;
+    conll_compute_relative_index_of_heads(s);
+    conll_print_sentence_mcf3(s, op.columns, NB_COL);
+  }
+  if(op.filename)
+    fclose(op.fd_parses);
+  conll_free_sentence(s);
+  return 0;
+}
diff --git a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c
index f4646dd..a36bc54 100644
--- a/maca_trans_parser/src/simple_decoder_parser_arc_eager.c
+++ b/maca_trans_parser/src/simple_decoder_parser_arc_eager.c
@@ -9,6 +9,7 @@
 #include"config2feat_vec.h"
 #include"feature_table.h"
 #include"dico.h"
+#include"word.h"
 
 void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct)
 {
@@ -150,7 +151,9 @@ void simple_decoder_parser_arc_eager(context *ctx)
 
     if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){
       word_set_sent_seg(stack_top(config_get_stack(c)), -1);
+      word_set_gov(stack_top(config_get_stack(c)), 0);
       movement_parser_eos(c);
+
       while(movement_parser_reduce(c));
       while(movement_parser_root(c, root_label));
       if(ctx->debug_mode) printf("force EOS\n");
-- 
GitLab