From 7f4221855d58db770c5467dac5ee2b07421f925d Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Mon, 25 Nov 2019 01:14:56 +0100
Subject: [PATCH] EOS is now predicted by a new classifier called segmenter

---
 UD_any/data/Makefile                          |   6 +-
 UD_any/data/getActionSets.py                  |  20 ++-
 UD_any/parser/errorCorrection.tm              |  19 ---
 UD_any/parser/error_parser.as                 |   5 -
 UD_any/parser/error_parser.cla                |   6 -
 UD_any/parser/error_parser.fm                 | 125 ------------------
 UD_any/parser/machine.tm                      |   8 +-
 UD_any/parser/normal.tm                       |   8 +-
 UD_any/parser/parser.dicts                    |   8 ++
 UD_any/parser/segmenter.cla                   |   7 +
 UD_any/parser/signature.cla                   |   4 -
 UD_any/parser_basic/parser.cla                |   2 +-
 UD_any/parser_basic/strategy.cla              |   2 +-
 UD_any/segmenter/machine.tm                   |  11 ++
 UD_any/segmenter/segmenter.cla                |   7 +
 UD_any/segmenter/segmenter.dicts              |   9 ++
 UD_any/segmenter/strategy.cla                 |   4 +
 UD_any/segmenter/test.bd                      |   6 +
 UD_any/segmenter/train.bd                     |   6 +
 UD_any/tagparser/machine.tm                   |   4 +
 UD_any/tagparser/normal.tm                    |   4 +
 UD_any/tagparser/segmenter.cla                |   7 +
 UD_any/tagparser/tagparser.dicts              |   8 ++
 UD_any/tagparser_sequential/machine.tm        |   4 +
 UD_any/tagparser_sequential/normal.tm         |   4 +
 UD_any/tagparser_sequential/segmenter.cla     |   7 +
 UD_any/tagparser_sequential/tagparser.dicts   |   8 ++
 UD_any/tagparser_sequential_strong/machine.tm |   6 +-
 UD_any/tagparser_sequential_strong/normal.tm  |   6 +-
 .../tagparser_sequential_strong/segmenter.cla |   7 +
 .../tagparser.dicts                           |   8 ++
 UD_any/tokeparser/machine.tm                  |   4 +
 UD_any/tokeparser/segmenter.cla               |   7 +
 UD_any/tokeparser/tokeparser.dicts            |   8 ++
 UD_any/tokeparser_incremental/machine.tm      |   4 +
 UD_any/tokeparser_incremental/segmenter.cla   |   7 +
 .../tokeparser_incremental/tokeparser.dicts   |   8 ++
 37 files changed, 201 insertions(+), 173 deletions(-)
 delete mode 100644 UD_any/parser/errorCorrection.tm
 delete mode 100644 UD_any/parser/error_parser.as
 delete mode 100644 UD_any/parser/error_parser.cla
 delete mode 100644 UD_any/parser/error_parser.fm
 create mode 100644 UD_any/parser/segmenter.cla
 delete mode 100644 UD_any/parser/signature.cla
 create mode 100644 UD_any/segmenter/machine.tm
 create mode 100644 UD_any/segmenter/segmenter.cla
 create mode 100644 UD_any/segmenter/segmenter.dicts
 create mode 100644 UD_any/segmenter/strategy.cla
 create mode 100644 UD_any/segmenter/test.bd
 create mode 100644 UD_any/segmenter/train.bd
 create mode 100644 UD_any/tagparser/segmenter.cla
 create mode 100644 UD_any/tagparser_sequential/segmenter.cla
 create mode 100644 UD_any/tagparser_sequential_strong/segmenter.cla
 create mode 100644 UD_any/tokeparser/segmenter.cla
 create mode 100644 UD_any/tokeparser_incremental/segmenter.cla

diff --git a/UD_any/data/Makefile b/UD_any/data/Makefile
index f424c10..86c5e4a 100644
--- a/UD_any/data/Makefile
+++ b/UD_any/data/Makefile
@@ -15,7 +15,7 @@ FP_FILENAME=fP
 RULES_FILENAME=lemmatizer_rules.as
 EXCEPTIONS_FPLM_FILENAME=maca_trans_lemmatizer_exceptions.fplm
 
-all: tokenizer.as texts all_no_test.conllu columns $(FPLM_FILENAME) $(FP_FILENAME) $(RULES_FILENAME)
+all: tokenizer.as segmenter.as texts all_no_test.conllu columns $(FPLM_FILENAME) $(FP_FILENAME) $(RULES_FILENAME)
 	rm col_*\.txt
 	rm all_no_test.conllu
 
@@ -28,6 +28,10 @@ tokenizer.as: all_no_test.conllu $(MCD)
 	$(TOOLS)/conllu2splits.py $< $(MCD) >> $@ 2> ambiguities.txt
 	echo "ENDWORD" >> $@
 	echo "ADDCHARTOWORD" >> $@
+
+segmenter.as:
+	echo "EOS b.0" > $@
+	echo "REWRITE b.0 EOS _" >> $@
  
 columns: all_no_test.conllu $(MCD)
 	for number in 1 2 3 4 5 6 7 8 9 10 ; do \
diff --git a/UD_any/data/getActionSets.py b/UD_any/data/getActionSets.py
index 1e1b397..9898191 100755
--- a/UD_any/data/getActionSets.py
+++ b/UD_any/data/getActionSets.py
@@ -81,6 +81,25 @@ if __name__ == "__main__" :
       output.close()
 
     elif nameCol == "LABEL" :
+      output = open("parser_legacy.as", 'w', encoding='utf-8')
+      print("REDUCE", file=output)
+      labels = set()
+      labelsList = []
+      for line in open(colFile, "r", encoding='utf-8') :
+        striped = line.strip()
+        if len(striped) == 0 or striped == "root" or striped == "_" :
+          continue
+        label = striped.split(':')[0]
+        if label not in labels :
+          labels.add(striped)
+          labelsList.append(striped)
+      labelsList.sort()
+      for label in labelsList :
+        print("LEFT " + label, file=output)
+        print("RIGHT " + label, file=output)
+      print("EOS s.0", file=output)
+      print("Default : SHIFT", file=output)
+      output.close()
       output = open("parser.as", 'w', encoding='utf-8')
       print("REDUCE", file=output)
       labels = set()
@@ -97,7 +116,6 @@ if __name__ == "__main__" :
       for label in labelsList :
         print("LEFT " + label, file=output)
         print("RIGHT " + label, file=output)
-      print("EOS", file=output)
       print("Default : SHIFT", file=output)
       output.close()
 
diff --git a/UD_any/parser/errorCorrection.tm b/UD_any/parser/errorCorrection.tm
deleted file mode 100644
index c251e16..0000000
--- a/UD_any/parser/errorCorrection.tm
+++ /dev/null
@@ -1,19 +0,0 @@
-Name : Parser Machine with predicted backtracking
-Dicts : parser.dicts
-%CLASSIFIERS
-strategy strategy.cla
-signature signature.cla
-parser parser.cla
-error_parser error_parser.cla
-%STATES
-strategy strategy
-signature signature
-parser parser
-error_parser error_parser
-%TRANSITIONS
-strategy signature MOVE signature
-strategy parser MOVE parser
-parser error_parser *
-error_parser parser BACK
-error_parser strategy *
-signature strategy *
diff --git a/UD_any/parser/error_parser.as b/UD_any/parser/error_parser.as
deleted file mode 100644
index e9182f3..0000000
--- a/UD_any/parser/error_parser.as
+++ /dev/null
@@ -1,5 +0,0 @@
-EPSILON
-BACK 1
-BACK 2
-BACK 3
-BACK 4
diff --git a/UD_any/parser/error_parser.cla b/UD_any/parser/error_parser.cla
deleted file mode 100644
index a866556..0000000
--- a/UD_any/parser/error_parser.cla
+++ /dev/null
@@ -1,6 +0,0 @@
-Name : Error_Parser
-Type : Prediction
-Oracle : none
-Feature Model : error_parser.fm
-Action Set : error_parser.as
-Topology : M(200,RELU,0.3)
diff --git a/UD_any/parser/error_parser.fm b/UD_any/parser/error_parser.fm
deleted file mode 100644
index dbaeb62..0000000
--- a/UD_any/parser/error_parser.fm
+++ /dev/null
@@ -1,125 +0,0 @@
-# Features classiques
-# FORM
-s.0#LEMMA.fasttext
-s.1#LEMMA.fasttext
-s.2#LEMMA.fasttext
-s.0.ldep#LEMMA.fasttext
-s.1.ldep#LEMMA.fasttext
-s.0.rdep#LEMMA.fasttext
-s.1.rdep#LEMMA.fasttext
-s.0.ldep.ldep#LEMMA.fasttext
-s.1.ldep.ldep#LEMMA.fasttext
-s.0.rdep.rdep#LEMMA.fasttext
-s.1.rdep.rdep#LEMMA.fasttext
-s.0.l2dep#LEMMA.fasttext
-s.1.l2dep#LEMMA.fasttext
-s.0.r2dep#LEMMA.fasttext
-s.1.r2dep#LEMMA.fasttext
-b.0#LEMMA.fasttext
-b.1#LEMMA.fasttext
-b.2#LEMMA.fasttext
-b.-1#LEMMA.fasttext
-b.-2#LEMMA.fasttext
-b.0.ldep#LEMMA.fasttext
-# POS
-s.0#POS
-s.1#POS
-s.2#POS
-s.0.ldep#POS
-s.1.ldep#POS
-s.0.rdep#POS
-s.1.rdep#POS
-s.0.ldep.ldep#POS
-s.1.ldep.ldep#POS
-s.0.rdep.rdep#POS
-s.1.rdep.rdep#POS
-s.0.l2dep#POS
-s.1.l2dep#POS
-s.0.r2dep#POS
-s.1.r2dep#POS
-b.0#POS
-b.-1#POS
-b.-2#POS
-b.0.ldep#POS
-# MORPHO
-s.0#MORPHO
-s.1#MORPHO
-s.2#MORPHO
-s.0.ldep#MORPHO
-s.1.ldep#MORPHO
-s.0.rdep#MORPHO
-s.1.rdep#MORPHO
-s.0.ldep.ldep#MORPHO
-s.1.ldep.ldep#MORPHO
-s.0.rdep.rdep#MORPHO
-s.1.rdep.rdep#MORPHO
-s.0.l2dep#MORPHO
-s.1.l2dep#MORPHO
-s.0.r2dep#MORPHO
-s.1.r2dep#MORPHO
-b.0#MORPHO
-b.-1#MORPHO
-b.-2#MORPHO
-b.0.ldep#MORPHO
-# LABELS
-s.0.ldep#LABEL
-s.1.ldep#LABEL
-s.0.rdep#LABEL
-s.1.rdep#LABEL
-s.0.ldep.ldep#LABEL
-s.1.ldep.ldep#LABEL
-s.0.rdep.rdep#LABEL
-s.1.rdep.rdep#LABEL
-s.0.l2dep#LABEL
-s.1.l2dep#LABEL
-s.0.r2dep#LABEL
-s.1.r2dep#LABEL
-b.0.ldep#LABEL
-# DISTANCE
-s.0#DIST.s.1
-b.0#DIST.s.0
-# VALENCY
-s.0#nbr
-s.1#nbr
-s.0#nbl
-s.1#nbl
-# SIGNATURES
-b.1#SGN
-b.2#SGN
-# UPPERCASE
-b.0#FORM.U
-b.1#FORM.U
-# EOS
-b.-1#EOS
-# HISTORY
-tc.0
-tc.1
-tc.2
-tc.3
-tc.4
-# ERROR CORRECTION
-#b.1.ldep#LEMMA.fasttext
-#b.1.ldep#POS
-#b.1.ldep#MORPHO
-#b.1.ldep#LABEL
-#b.1#LABEL
-#b.1.gov#POS
-#b.1.gov#MORPHO
-# More feats
-b.1#POS
-b.1#MORPHO
-b.2#POS
-b.2#MORPHO
-# More feats
-b.-2#LABEL
-b.-1#LABEL
-b.0#LABEL
-s.0#LABEL
-s.1#LABEL
-# More feats
-b.-1.gov#POS
-b.-1.gov#MORPHO
-s.0.gov#POS
-s.0.gov#MORPHO
-s.1.gov#POS
-s.1.gov#MORPHO
diff --git a/UD_any/parser/machine.tm b/UD_any/parser/machine.tm
index 66276c3..648aa41 100644
--- a/UD_any/parser/machine.tm
+++ b/UD_any/parser/machine.tm
@@ -2,14 +2,14 @@ Name : Parser Machine
 Dicts : parser.dicts
 %CLASSIFIERS
 strategy strategy.cla
-signature signature.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
-signature signature
 parser parser
+segmenter segmenter
 %TRANSITIONS
-strategy signature MOVE signature
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 parser strategy *
-signature strategy *
+segmenter strategy *
diff --git a/UD_any/parser/normal.tm b/UD_any/parser/normal.tm
index 66276c3..648aa41 100644
--- a/UD_any/parser/normal.tm
+++ b/UD_any/parser/normal.tm
@@ -2,14 +2,14 @@ Name : Parser Machine
 Dicts : parser.dicts
 %CLASSIFIERS
 strategy strategy.cla
-signature signature.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
-signature signature
 parser parser
+segmenter segmenter
 %TRANSITIONS
-strategy signature MOVE signature
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 parser strategy *
-signature strategy *
+segmenter strategy *
diff --git a/UD_any/parser/parser.dicts b/UD_any/parser/parser.dicts
index 1c5346f..b39dbe4 100644
--- a/UD_any/parser/parser.dicts
+++ b/UD_any/parser/parser.dicts
@@ -12,6 +12,14 @@ Parser_letters 30        Embeddings _ 200000
 Parser_labels  18        Embeddings _ 50
 Parser_morpho  22        Embeddings _ 50000
 ########################################################
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
+########################################################
 Error_Parser_actions 18        Embeddings _
 Error_Parser_bool    16        Embeddings _
 Error_Parser_int     16        Embeddings _
diff --git a/UD_any/parser/segmenter.cla b/UD_any/parser/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/parser/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/parser/signature.cla b/UD_any/parser/signature.cla
deleted file mode 100644
index e979899..0000000
--- a/UD_any/parser/signature.cla
+++ /dev/null
@@ -1,4 +0,0 @@
-Name : Signature
-Type : Information
-Oracle : signature
-Oracle Filename : data/fP
diff --git a/UD_any/parser_basic/parser.cla b/UD_any/parser_basic/parser.cla
index b3b31dd..504a9d3 100644
--- a/UD_any/parser_basic/parser.cla
+++ b/UD_any/parser_basic/parser.cla
@@ -2,6 +2,6 @@ Name : Parser
 Type : Prediction
 Oracle : parser
 Feature Model : parser.fm
-Action Set : data/parser.as
+Action Set : data/parser_legacy.as
 Topology : (500,RELU,0.3)
 Dynamic : yes
diff --git a/UD_any/parser_basic/strategy.cla b/UD_any/parser_basic/strategy.cla
index 29dc251..b9ace88 100644
--- a/UD_any/parser_basic/strategy.cla
+++ b/UD_any/parser_basic/strategy.cla
@@ -1,4 +1,4 @@
 Name : Strategy
 Type : Information
-Oracle : strategy_parser
+Oracle : strategy_parser_legacy
 Oracle Filename : none
diff --git a/UD_any/segmenter/machine.tm b/UD_any/segmenter/machine.tm
new file mode 100644
index 0000000..7bf5e19
--- /dev/null
+++ b/UD_any/segmenter/machine.tm
@@ -0,0 +1,11 @@
+Name : Sentence Segmenter Machine
+Dicts : segmenter.dicts
+%CLASSIFIERS
+strategy strategy.cla
+segmenter segmenter.cla
+%STATES
+strategy strategy
+segmenter segmenter
+%TRANSITIONS
+strategy segmenter *
+segmenter strategy *
diff --git a/UD_any/segmenter/segmenter.cla b/UD_any/segmenter/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/segmenter/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/segmenter/segmenter.dicts b/UD_any/segmenter/segmenter.dicts
new file mode 100644
index 0000000..86a0377
--- /dev/null
+++ b/UD_any/segmenter/segmenter.dicts
@@ -0,0 +1,9 @@
+#Name          Dimension Mode       file                                #
+#########################################################################
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
diff --git a/UD_any/segmenter/strategy.cla b/UD_any/segmenter/strategy.cla
new file mode 100644
index 0000000..6a2c5ae
--- /dev/null
+++ b/UD_any/segmenter/strategy.cla
@@ -0,0 +1,4 @@
+Name : Strategy
+Type : Information
+Oracle : strategy_segmenter
+Oracle Filename : none
diff --git a/UD_any/segmenter/test.bd b/UD_any/segmenter/test.bd
new file mode 100644
index 0000000..03a0d70
--- /dev/null
+++ b/UD_any/segmenter/test.bd
@@ -0,0 +1,6 @@
+#Index Name  ref/hyp dict    Policy   Must print?#
+##################################################
+0      ID    hyp     none    Final    1
+1      FORM  ref     form    Final    1
+3      POS   ref     pos     Final    1
+0      EOS   hyp     int     Final    0
diff --git a/UD_any/segmenter/train.bd b/UD_any/segmenter/train.bd
new file mode 100644
index 0000000..0fea5ba
--- /dev/null
+++ b/UD_any/segmenter/train.bd
@@ -0,0 +1,6 @@
+#Index Name  ref/hyp dict    Policy   Must print?#
+##################################################
+0      ID    hyp     none    FromZero 1
+1      FORM  ref     form    FromZero 1
+3      POS   ref     pos     FromZero 1
+0      EOS   hyp     int     FromZero 0
diff --git a/UD_any/tagparser/machine.tm b/UD_any/tagparser/machine.tm
index 77ebff3..e0858d0 100644
--- a/UD_any/tagparser/machine.tm
+++ b/UD_any/tagparser/machine.tm
@@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tagger tagger
@@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tagger MOVE tagger
 strategy morpho MOVE morpho
@@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tagger strategy *
 morpho strategy *
 lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tagparser/normal.tm b/UD_any/tagparser/normal.tm
index 77ebff3..e0858d0 100644
--- a/UD_any/tagparser/normal.tm
+++ b/UD_any/tagparser/normal.tm
@@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tagger tagger
@@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tagger MOVE tagger
 strategy morpho MOVE morpho
@@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tagger strategy *
 morpho strategy *
 lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tagparser/segmenter.cla b/UD_any/tagparser/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/tagparser/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tagparser/tagparser.dicts b/UD_any/tagparser/tagparser.dicts
index c328e31..9672e00 100644
--- a/UD_any/tagparser/tagparser.dicts
+++ b/UD_any/tagparser/tagparser.dicts
@@ -49,6 +49,14 @@ Parser_lemma   30        Embeddings _ 50000
 Parser_letters 30        Embeddings _ 20000
 Parser_labels  18        Embeddings _ 50
 Parser_morpho  22        Embeddings _ 50000
+# SEGMENTER
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
 # ERROR_TAGGER
 Error_Tagger_actions 18        Embeddings _
 Error_Tagger_bool    16        Embeddings _
diff --git a/UD_any/tagparser_sequential/machine.tm b/UD_any/tagparser_sequential/machine.tm
index f1c0b0e..2817689 100644
--- a/UD_any/tagparser_sequential/machine.tm
+++ b/UD_any/tagparser_sequential/machine.tm
@@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tagger tagger
@@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tagger MOVE tagger
 strategy morpho MOVE morpho
@@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tagger strategy *
 morpho strategy *
 lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tagparser_sequential/normal.tm b/UD_any/tagparser_sequential/normal.tm
index f1c0b0e..2817689 100644
--- a/UD_any/tagparser_sequential/normal.tm
+++ b/UD_any/tagparser_sequential/normal.tm
@@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tagger tagger
@@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tagger MOVE tagger
 strategy morpho MOVE morpho
@@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tagger strategy *
 morpho strategy *
 lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tagparser_sequential/segmenter.cla b/UD_any/tagparser_sequential/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/tagparser_sequential/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tagparser_sequential/tagparser.dicts b/UD_any/tagparser_sequential/tagparser.dicts
index c328e31..9672e00 100644
--- a/UD_any/tagparser_sequential/tagparser.dicts
+++ b/UD_any/tagparser_sequential/tagparser.dicts
@@ -49,6 +49,14 @@ Parser_lemma   30        Embeddings _ 50000
 Parser_letters 30        Embeddings _ 20000
 Parser_labels  18        Embeddings _ 50
 Parser_morpho  22        Embeddings _ 50000
+# SEGMENTER
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
 # ERROR_TAGGER
 Error_Tagger_actions 18        Embeddings _
 Error_Tagger_bool    16        Embeddings _
diff --git a/UD_any/tagparser_sequential_strong/machine.tm b/UD_any/tagparser_sequential_strong/machine.tm
index f1c0b0e..e0858d0 100644
--- a/UD_any/tagparser_sequential_strong/machine.tm
+++ b/UD_any/tagparser_sequential_strong/machine.tm
@@ -1,4 +1,4 @@
-Name : Tagger, Morpho, Lemmatizer and Parser sequential Machine
+Name : Tagger, Morpho, Lemmatizer and Parser Machine
 Dicts : tagparser.dicts
 %CLASSIFIERS
 strategy strategy.cla
@@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tagger tagger
@@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tagger MOVE tagger
 strategy morpho MOVE morpho
@@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tagger strategy *
 morpho strategy *
 lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tagparser_sequential_strong/normal.tm b/UD_any/tagparser_sequential_strong/normal.tm
index f1c0b0e..e0858d0 100644
--- a/UD_any/tagparser_sequential_strong/normal.tm
+++ b/UD_any/tagparser_sequential_strong/normal.tm
@@ -1,4 +1,4 @@
-Name : Tagger, Morpho, Lemmatizer and Parser sequential Machine
+Name : Tagger, Morpho, Lemmatizer and Parser Machine
 Dicts : tagparser.dicts
 %CLASSIFIERS
 strategy strategy.cla
@@ -8,6 +8,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tagger tagger
@@ -16,6 +17,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tagger MOVE tagger
 strategy morpho MOVE morpho
@@ -23,9 +25,11 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tagger strategy *
 morpho strategy *
 lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tagparser_sequential_strong/segmenter.cla b/UD_any/tagparser_sequential_strong/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/tagparser_sequential_strong/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tagparser_sequential_strong/tagparser.dicts b/UD_any/tagparser_sequential_strong/tagparser.dicts
index c328e31..9672e00 100644
--- a/UD_any/tagparser_sequential_strong/tagparser.dicts
+++ b/UD_any/tagparser_sequential_strong/tagparser.dicts
@@ -49,6 +49,14 @@ Parser_lemma   30        Embeddings _ 50000
 Parser_letters 30        Embeddings _ 20000
 Parser_labels  18        Embeddings _ 50
 Parser_morpho  22        Embeddings _ 50000
+# SEGMENTER
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
 # ERROR_TAGGER
 Error_Tagger_actions 18        Embeddings _
 Error_Tagger_bool    16        Embeddings _
diff --git a/UD_any/tokeparser/machine.tm b/UD_any/tokeparser/machine.tm
index 0e1b043..f591178 100644
--- a/UD_any/tokeparser/machine.tm
+++ b/UD_any/tokeparser/machine.tm
@@ -9,6 +9,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tokenizer tokenizer
@@ -18,6 +19,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tokenizer MOVE tokenizer
 strategy tagger MOVE tagger
@@ -26,6 +28,7 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tokenizer strategy *
 tagger strategy *
 morpho strategy *
@@ -33,3 +36,4 @@ lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tokeparser/segmenter.cla b/UD_any/tokeparser/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/tokeparser/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser/tokeparser.dicts b/UD_any/tokeparser/tokeparser.dicts
index 4534960..c9235b9 100644
--- a/UD_any/tokeparser/tokeparser.dicts
+++ b/UD_any/tokeparser/tokeparser.dicts
@@ -56,6 +56,14 @@ Parser_lemma   30        Embeddings _ 50000
 Parser_letters 30        Embeddings _ 20000
 Parser_labels  18        Embeddings _ 50
 Parser_morpho  22        Embeddings _ 50000
+# SEGMENTER
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
 # ERROR_TAGGER
 Error_Tagger_actions 18        Embeddings _
 Error_Tagger_bool    16        Embeddings _
diff --git a/UD_any/tokeparser_incremental/machine.tm b/UD_any/tokeparser_incremental/machine.tm
index 0e1b043..f591178 100644
--- a/UD_any/tokeparser_incremental/machine.tm
+++ b/UD_any/tokeparser_incremental/machine.tm
@@ -9,6 +9,7 @@ lemmatizer_lookup lemmatizer_lookup.cla
 lemmatizer_rules lemmatizer_rules.cla
 lemmatizer_case lemmatizer_case.cla
 parser parser.cla
+segmenter segmenter.cla
 %STATES
 strategy strategy
 tokenizer tokenizer
@@ -18,6 +19,7 @@ lemmatizer_lookup lemmatizer_lookup
 lemmatizer_rules lemmatizer_rules
 lemmatizer_case lemmatizer_case
 parser parser
+segmenter segmenter
 %TRANSITIONS
 strategy tokenizer MOVE tokenizer
 strategy tagger MOVE tagger
@@ -26,6 +28,7 @@ strategy lemmatizer_lookup MOVE lemmatizer_lookup
 strategy lemmatizer_rules MOVE lemmatizer_rules
 strategy lemmatizer_case MOVE lemmatizer_case
 strategy parser MOVE parser
+strategy segmenter MOVE segmenter
 tokenizer strategy *
 tagger strategy *
 morpho strategy *
@@ -33,3 +36,4 @@ lemmatizer_lookup strategy *
 lemmatizer_case strategy *
 lemmatizer_rules strategy *
 parser strategy *
+segmenter strategy *
diff --git a/UD_any/tokeparser_incremental/segmenter.cla b/UD_any/tokeparser_incremental/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/tokeparser_incremental/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_incremental/tokeparser.dicts b/UD_any/tokeparser_incremental/tokeparser.dicts
index 7b7fd2b..fd78502 100644
--- a/UD_any/tokeparser_incremental/tokeparser.dicts
+++ b/UD_any/tokeparser_incremental/tokeparser.dicts
@@ -59,6 +59,14 @@ Parser_lemma   30        Embeddings _ 50000
 Parser_letters 30        Embeddings _ 20000
 Parser_labels  18        Embeddings _ 50
 Parser_morpho  22        Embeddings _ 50000
+# SEGMENTER
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
 # ERROR_TAGGER
 Error_Tagger_actions 18        Embeddings _
 Error_Tagger_bool    16        Embeddings _
-- 
GitLab