diff --git a/UD_fr-GSD/data/Makefile b/UD_fr-GSD/data/Makefile
index 382877018c97217fc3c6fa2014890b030ce2d3dc..1a62b88521194393a30f64cf424f673f486b4478 100644
--- a/UD_fr-GSD/data/Makefile
+++ b/UD_fr-GSD/data/Makefile
@@ -6,6 +6,7 @@ DEV=$(UD_DIR)/dev.conllu
 MCD=wpmlgfs.mcd
 CONLLUMCD=conllu.mcd
 CONLLU2MCF=$(TOOLS)/conllu2mcf.py
+CONLL2TXT=$(TOOLS)/conll2text.py
 
 #This part is for lemmatizer rules and excpetions computation
 THRESHOLD=10
@@ -23,18 +24,22 @@ $(CONLLUMCD):
 
 train.mcf: $(TRAIN) $(CONLLUMCD)
 	$(CONLLU2MCF) $< $(CONLLUMCD) $@ $(MCD)
+	$(TOOLS)/conllu2splits.py $< $(CONLLUMCD) > splits.txt
+	$(CONLL2TXT) $< $$'\n' > train.txt
 	$(TOOLS)/mcfRemovePonct.py $@ $(MCD) > train_noponct.mcf
 	$(TOOLS)/mcfShuffleAndMakeDev.py $@ $(MCD) 0.2 dummy.mcf train_tiny.mcf
 	rm dummy.mcf
 
 test.mcf: $(TEST)
 	$(CONLLU2MCF) $< $(CONLLUMCD) $@ $(MCD)
+	$(CONLL2TXT) $< $$'\n' > test.txt
 	$(TOOLS)/mcfRemovePonct.py $@ $(MCD) > test_noponct.mcf
 	$(TOOLS)/mcfShuffleAndMakeDev.py $@ $(MCD) 0.6 dummy.mcf test_tiny.mcf
 	rm dummy.mcf
 
 dev.mcf: $(DEV)
 	$(CONLLU2MCF) $< $(CONLLUMCD) $@ $(MCD)
+	$(CONLL2TXT) $< $$'\n' > dev.txt
 	$(TOOLS)/mcfRemovePonct.py $@ $(MCD) > dev_noponct.mcf
 	$(TOOLS)/mcfShuffleAndMakeDev.py $@ $(MCD) 0.2 dummy.mcf dev_tiny.mcf
 	rm dummy.mcf
@@ -52,6 +57,7 @@ $(RULES_FILENAME): $(FPLM_FILENAME)
 
 clean:
 	- rm *\.mcf
+	- rm *\.txt
 	- rm *\.conll*
 	- rm conll*\.mcd
 	- rm $(RULES_FILENAME)
diff --git a/UD_fr-GSD/data/wpmlgfs.mcd b/UD_fr-GSD/data/wpmlgfs.mcd
index 3656ca67232e55af51ee6f68fb3c7de04de7b2a6..6bfb75240101b2b2a65b69c548727f5fd30f58ed 100644
--- a/UD_fr-GSD/data/wpmlgfs.mcd
+++ b/UD_fr-GSD/data/wpmlgfs.mcd
@@ -1,7 +1,9 @@
-0 FORM
-1 POS
-2 MORPHO
-3 LEMMA
-4 GOV
-5 LABEL
-6 EOS
+0 ID
+1 FORM
+2 POS
+3 MORPHO
+4 LEMMA
+5 GOV
+6 LABEL
+7 EOS
+8 TEXT
diff --git a/UD_fr-GSD/eval/eval.sh b/UD_fr-GSD/eval/eval.sh
index 762d8f9128b82e846b55b3ea5d3d489c105c04f8..17e85d89de3a769daddba87e1a389b983900213b 100755
--- a/UD_fr-GSD/eval/eval.sh
+++ b/UD_fr-GSD/eval/eval.sh
@@ -3,6 +3,6 @@
 LANG=UD_fr-GSD
 MCF=../data/test.mcf
 MCD=../data/wpmlgfs.mcd
-ARGS="--keepPunct EOS --relative LABEL GOV --ignore FORM"
+ARGS="--keepPunct EOS --relative LABEL GOV --ignore FORM --ignore TEXT"
 
 exec ../../scripts/eval.py $LANG $MCF $MCD $* $ARGS
diff --git a/UD_fr-GSD/tagger/machine.tm b/UD_fr-GSD/tagger/machine.tm
index 8d2fd44a553d55c300d2d6a05e2928bec55bf0ca..c494ebdb843a3f43fd377c86a0faf0b7a340371b 100644
--- a/UD_fr-GSD/tagger/machine.tm
+++ b/UD_fr-GSD/tagger/machine.tm
@@ -1,19 +1,19 @@
-Name : Tagger with error correction
+Name : Tagger Machine
 Dicts : tagger.dicts
 %CLASSIFIERS
 strategy strategy.cla
 tagger tagger.cla
+tokenizer tokenizer.cla
 signature signature.cla
-error_tagger error_tagger.cla
 %STATES
 strategy strategy
+tokenizer tokenizer
 signature signature
 tagger tagger
-error_tagger error_tagger
 %TRANSITIONS
 strategy signature MOVE signature
 strategy tagger MOVE tagger
-tagger error_tagger *
-error_tagger tagger BACK
-error_tagger strategy *
+strategy tokenizer MOVE tokenizer
+tagger strategy *
 signature strategy *
+tokenizer strategy *
diff --git a/UD_fr-GSD/tagger/normal.tm b/UD_fr-GSD/tagger/normal.tm
index d28513e078d7b9f13acd0ebbc9a4136a8b1c2133..c494ebdb843a3f43fd377c86a0faf0b7a340371b 100644
--- a/UD_fr-GSD/tagger/normal.tm
+++ b/UD_fr-GSD/tagger/normal.tm
@@ -3,13 +3,17 @@ Dicts : tagger.dicts
 %CLASSIFIERS
 strategy strategy.cla
 tagger tagger.cla
+tokenizer tokenizer.cla
 signature signature.cla
 %STATES
 strategy strategy
+tokenizer tokenizer
 signature signature
 tagger tagger
 %TRANSITIONS
 strategy signature MOVE signature
 strategy tagger MOVE tagger
+strategy tokenizer MOVE tokenizer
 tagger strategy *
 signature strategy *
+tokenizer strategy *
diff --git a/UD_fr-GSD/tagger/strategy.cla b/UD_fr-GSD/tagger/strategy.cla
index 12765cd204125534b3d99081d44feeea5ff31078..4013fa9851e97466104f86a8b2f1aba532ecbcac 100644
--- a/UD_fr-GSD/tagger/strategy.cla
+++ b/UD_fr-GSD/tagger/strategy.cla
@@ -1,4 +1,4 @@
 Name : Strategy
 Type : Information
-Oracle : strategy_tagger
+Oracle : strategy_tokenizer,tagger
 Oracle Filename : none
diff --git a/UD_fr-GSD/tagger/tagger.dicts b/UD_fr-GSD/tagger/tagger.dicts
index 1130185fc416f423a0c1d832d7d820b966f884ee..85e77993945dec797f69cb6f5deb9c8773560db1 100644
--- a/UD_fr-GSD/tagger/tagger.dicts
+++ b/UD_fr-GSD/tagger/tagger.dicts
@@ -18,6 +18,15 @@ Tagger_sgn     10        Embeddings
 Tagger_actions 05        Embeddings
 Tagger_entropy 05        Embeddings
 #########################################################################
+Tokenizer_bool    02        Embeddings
+Tokenizer_int     05        Embeddings
+Tokenizer_letters 30        Embeddings
+Tokenizer_pos     15        Embeddings
+Tokenizer_form    30        Embeddings
+Tokenizer_sgn     10        Embeddings
+Tokenizer_actions 05        Embeddings
+Tokenizer_entropy 05        Embeddings
+#########################################################################
 Error_Tagger_bool    02        Embeddings
 Error_Tagger_int     05        Embeddings
 Error_Tagger_letters 30        Embeddings
diff --git a/UD_fr-GSD/tagger/tagger.fm b/UD_fr-GSD/tagger/tagger.fm
index 22f91fcf02536fc31b026966ee02a418d9983311..4e4ff5b2fdc2edea835c185df954dd7320f76c87 100644
--- a/UD_fr-GSD/tagger/tagger.fm
+++ b/UD_fr-GSD/tagger/tagger.fm
@@ -19,8 +19,6 @@ b.0#FORM.U
 #b.1#FORM.U
 # UPPERCASE
 b.0#FORM.LEN
-# EOS
-b.-2#EOS
 # SUFFIXES
 b.0#FORM.PART.-4.-4
 b.0#FORM.PART.-3.-3
diff --git a/UD_fr-GSD/tagger/test.bd b/UD_fr-GSD/tagger/test.bd
index 342ce5bfd24c68965b9c63422a6279209f9aff02..11f50fa71c84935b56d7a29fd5806c612f5dc74b 100644
--- a/UD_fr-GSD/tagger/test.bd
+++ b/UD_fr-GSD/tagger/test.bd
@@ -1,5 +1,7 @@
 #Name  ref/hyp dict    Policy   Must print?#
 ############################################
-FORM   ref     form    Final    1
+ID     hyp     none    FromZero 1
+FORM   hyp     form    Final    1
 POS    hyp     pos     Final    1
 SGN    hyp     sgn     Final    0
+TEXT   ref     none    Final    0
diff --git a/UD_fr-GSD/tagger/tokenizer.as b/UD_fr-GSD/tagger/tokenizer.as
new file mode 100644
index 0000000000000000000000000000000000000000..eb137555c630a43d4417d9ce7797a68b0b68e710
--- /dev/null
+++ b/UD_fr-GSD/tagger/tokenizer.as
@@ -0,0 +1,16 @@
+Default : IGNORECHAR
+SPLITWORD des@de@les
+SPLITWORD du@de@le
+SPLITWORD au@à@le
+SPLITWORD Au@à@le
+SPLITWORD aux@à@les
+SPLITWORD auxquelles@à@lesquelles
+SPLITWORD Des@de@les
+SPLITWORD auquel@à@lequel
+SPLITWORD Du@de@le
+SPLITWORD Aux@à@les
+SPLITWORD duquel@de@lequel
+SPLITWORD auxquels@à@lesquels
+SPLITWORD desquelles@de@lesquelles
+ADDCHARTOWORD
+ENDWORD
diff --git a/UD_fr-GSD/tagger/tokenizer.cla b/UD_fr-GSD/tagger/tokenizer.cla
new file mode 100644
index 0000000000000000000000000000000000000000..e0a1578142a5e5c9f4c446f3c3c7dda93dfb92e8
--- /dev/null
+++ b/UD_fr-GSD/tagger/tokenizer.cla
@@ -0,0 +1,6 @@
+Name : Tokenizer
+Type : Prediction
+Oracle : tokenizer
+Feature Model : tokenizer.fm
+Action Set : tokenizer.as
+Topology : (500,RELU,0.3)
diff --git a/UD_fr-GSD/tagger/tokenizer.fm b/UD_fr-GSD/tagger/tokenizer.fm
new file mode 100644
index 0000000000000000000000000000000000000000..e85005759543eee3de06d04bea68eb3603a04614
--- /dev/null
+++ b/UD_fr-GSD/tagger/tokenizer.fm
@@ -0,0 +1,37 @@
+# Features classiques
+# FORM
+b.0#FORM.fasttext
+b.-1#FORM.fasttext
+b.-2#FORM.fasttext
+# POS
+b.-1#POS
+b.-2#POS
+b.-3#POS
+# SIGNATURES
+b.-1#SGN
+b.0#SGN
+# UPPERCASE
+b.0#FORM.U
+# UPPERCASE
+b.0#FORM.LEN
+# SUFFIXES
+b.0#FORM.PART.-4.-4
+b.0#FORM.PART.-3.-3
+b.0#FORM.PART.-2.-2
+b.0#FORM.PART.-1.-1
+b.0#FORM.PART.0.0
+b.0#FORM.PART.1.1
+b.0#FORM.PART.2.2
+b.0#FORM.PART.3.3
+# RAW INPUT
+raw.-5
+raw.-4
+raw.-3
+raw.-2
+raw.-1
+raw.0
+raw.2
+raw.3
+raw.4
+raw.5
+raw.6
diff --git a/UD_fr-GSD/tagger/train.bd b/UD_fr-GSD/tagger/train.bd
index 49dc16a0d1f84d14e8b0f9d5a5da76d238c9b22e..f8765fa78024db9bfd3bb325bea8f3d7fa8626f4 100644
--- a/UD_fr-GSD/tagger/train.bd
+++ b/UD_fr-GSD/tagger/train.bd
@@ -1,6 +1,8 @@
 #Name  ref/hyp dict    Policy   Must print?#
 ############################################
-FORM   ref     form    FromZero 1
+ID     hyp     none    FromZero 1
+FORM   hyp     form    FromZero 1
 POS    hyp     pos     FromZero 1
 SGN    hyp     sgn     FromZero 1
 EOS    ref     int     FromZero 1
+TEXT   ref     none    Final    0
diff --git a/scripts/train.sh b/scripts/train.sh
index c38f6f5c5cb501db7dd087df3e5a4e8ddefdacd7..b614ce4c00aa0b83ae4efc36ec05bc7c7a02569a 100755
--- a/scripts/train.sh
+++ b/scripts/train.sh
@@ -1,7 +1,7 @@
 #! /bin/bash
 
-TRAIN=../../data/train.mcf
-DEV=../../data/dev.mcf
+TRAIN=../../data/train_tiny.mcf
+DEV=../../data/dev_tiny.mcf
 
 if [ "$2" == "-h" ]; then
   macaon_train "-h"
diff --git a/tools/conllu2mcf.py b/tools/conllu2mcf.py
index 3612feb88e99471ccab04f968a0b6da315db26e4..2ca8c7a8db71dc3a63dcc1040773b717c47b4cc1 100755
--- a/tools/conllu2mcf.py
+++ b/tools/conllu2mcf.py
@@ -32,16 +32,35 @@ def main() :
   output = []
   previousId = -1
 
+  currentSentence = ""
   for line in open(sys.argv[1], encoding="utf8") :
     clean = line.strip()
     if len(clean) < 2 :
       continue
-    if line[0] == '#' :
+    if line.split('=')[0] == "# sent_id " :
+      continue
+    if line.split('=')[0] == "# text " :
+      currentSentence = line[8:].strip()
       continue
 
     columns = clean.split('\t')
 
     if len(columns[int(conllMCDr["ID"])].split('-')) > 1 :
+      lineInMCF = []
+      for index in mcfMCD :
+        colName = mcfMCD[index]
+        while  len(lineInMCF) < int(index)+1 :
+          lineInMCF.append("")
+        value = "_"
+        if colName == "EOS" :
+          if int(columns[int(conllMCDr["ID"])].split('-')[0]) < previousId :
+            value = "1"
+            previousId = int(columns[int(conllMCDr["ID"])].split('-')[0])
+        if mcfMCD[index] in conllMCDr :
+          indexInColumns = int(conllMCDr[mcfMCD[index]])
+          value = columns[indexInColumns]
+        lineInMCF[int(index)] = value;
+      output.append(lineInMCF)
       continue
 
     id = int(columns[int(conllMCDr["ID"])])
@@ -50,10 +69,12 @@ def main() :
     if gov == 0 :
       relGov = 0
     eos = "_"
+    textValue = "_"
     if id < previousId :
       eos = "1"
 
     previousId = id
+    textValue = currentSentence
 
     lineInMCF = []
     for index in mcfMCD :
@@ -63,6 +84,8 @@ def main() :
         value = eos
       elif colName == "GOV" :
         value = relGov
+      elif colName == "TEXT" : 
+        value = textValue
       else :
         indexInColumns = int(conllMCDr[mcfMCD[index]])
         value = columns[indexInColumns]
@@ -72,11 +95,24 @@ def main() :
       lineInMCF[int(index)] = value
     output.append(lineInMCF)
 
+  hasText = False
+  textIndex = 0
   EOSIndex = int(mcfMCDr["EOS"])
+
+  if "TEXT" in mcfMCDr :
+    hasText = True
+    textIndex = int(mcfMCDr["TEXT"])
+
   for i in range(len(output)-1) :
     output[i][EOSIndex] = output[i+1][EOSIndex]
+
   output[-1][EOSIndex] = "1"
 
+  if hasText :
+    for i in range(len(output)) :
+      if output[i][EOSIndex] != "1" :
+        output[i][textIndex] = "_"
+
   outputFile = open(sys.argv[3], "w", encoding="utf8")
   for outputLine in output :
     for i in range(len(outputLine)) :
diff --git a/tools/mcf2conllu.py b/tools/mcf2conllu.py
index ae369143a8dcea77fead862e472b794e96c7bc04..d678e2cb219278552150a274e3141084c7922a0a 100755
--- a/tools/mcf2conllu.py
+++ b/tools/mcf2conllu.py
@@ -36,9 +36,6 @@ def main() :
     if len(line) < 2 or line[0] == '#' :
       continue
     splited = striped.split('\t')
-    if len(splited) != len(mcfMCD) :
-      print("ERROR : line \'%s\' wrong format.\n"%line)
-      exit(1)
 
     toPrint = ""
 
@@ -46,29 +43,42 @@ def main() :
       col = conllMCD[str(ind)]
       if col == "EMPTY" :
         toPrint += "_\t"
-      elif col == "ID" :
+      elif col == "ID" and "ID" not in mcfMCDr :
         toPrint += str(curID) + "\t"
       elif col == "GOV" :
-        relInd = int(splited[int(mcfMCDr["GOV"])])
-        gov = 0
-        if relInd != 0 :
-          gov = relInd + curID
-        toPrint += str(gov) + '\t'
+        if int(mcfMCDr["GOV"]) >= len(splited) :
+          if "ID" in mcfMCDr :
+            curID = int(splited[int(mcfMCDr["ID"])].split('-')[0])
+          if (curID == 1) and not ("EOS" in mcfMCDr and int(mcfMCDr["EOS"]) < len(splited)) :
+            toPrint += str(0)+'\t'
+          else :
+            toPrint += str(1)+'\t'
+        else :
+          relInd = int(splited[int(mcfMCDr["GOV"])])
+          gov = 0
+          if relInd != 0 :
+            gov = relInd + curID
+          toPrint += str(gov) + '\t'
       else :
         if col not in mcfMCDr :
           print("ERROR : %s not in mcf.mcd."%col)
           exit(1)
-        toPrint += splited[int(mcfMCDr[col])] + '\t'
+        if int(mcfMCDr[col]) >= len(splited) :
+          toPrint += '_\t'
+        else :
+          toPrint += splited[int(mcfMCDr[col])] + '\t'
 
     print(toPrint[:-1])
-    curID += 1
+    
+    if "ID" not in mcfMCDr :
+      curID += 1
 
-    if "EOS" in mcfMCDr :
+    if "EOS" in mcfMCDr and int(mcfMCDr["EOS"]) < len(splited) :
       if splited[int(mcfMCDr["EOS"])] == "1" :
         print("")
         curID = 1
 
-
 if __name__ == "__main__" :
   main()
+  print("")