From c52f71ed8adb25cc29375b695c762ec36330483f Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Wed, 27 Nov 2019 14:04:21 +0100
Subject: [PATCH] Added tokeparser_sequential

---
 .../tokeparser_sequential/lemmatizer_case.cla |   7 ++
 .../lemmatizer_lookup.cla                     |   4 +
 .../lemmatizer_rules.cla                      |   7 ++
 UD_any/tokeparser_sequential/machine.tm       |  39 +++++++
 UD_any/tokeparser_sequential/morpho.cla       |   7 ++
 UD_any/tokeparser_sequential/normal.tm        |  31 ++++++
 UD_any/tokeparser_sequential/parser.cla       |   7 ++
 UD_any/tokeparser_sequential/segmenter.cla    |   7 ++
 UD_any/tokeparser_sequential/strategy.cla     |   4 +
 UD_any/tokeparser_sequential/tagger.cla       |   7 ++
 UD_any/tokeparser_sequential/test.bd          |  11 ++
 UD_any/tokeparser_sequential/tokenizer.cla    |   7 ++
 UD_any/tokeparser_sequential/tokeparser.dicts | 102 ++++++++++++++++++
 UD_any/tokeparser_sequential/train.bd         |  11 ++
 14 files changed, 251 insertions(+)
 create mode 100644 UD_any/tokeparser_sequential/lemmatizer_case.cla
 create mode 100644 UD_any/tokeparser_sequential/lemmatizer_lookup.cla
 create mode 100644 UD_any/tokeparser_sequential/lemmatizer_rules.cla
 create mode 100644 UD_any/tokeparser_sequential/machine.tm
 create mode 100644 UD_any/tokeparser_sequential/morpho.cla
 create mode 100644 UD_any/tokeparser_sequential/normal.tm
 create mode 100644 UD_any/tokeparser_sequential/parser.cla
 create mode 100644 UD_any/tokeparser_sequential/segmenter.cla
 create mode 100644 UD_any/tokeparser_sequential/strategy.cla
 create mode 100644 UD_any/tokeparser_sequential/tagger.cla
 create mode 100644 UD_any/tokeparser_sequential/test.bd
 create mode 100644 UD_any/tokeparser_sequential/tokenizer.cla
 create mode 100644 UD_any/tokeparser_sequential/tokeparser.dicts
 create mode 100644 UD_any/tokeparser_sequential/train.bd

diff --git a/UD_any/tokeparser_sequential/lemmatizer_case.cla b/UD_any/tokeparser_sequential/lemmatizer_case.cla
new file mode 100644
index 0000000..0994d40
--- /dev/null
+++ b/UD_any/tokeparser_sequential/lemmatizer_case.cla
@@ -0,0 +1,7 @@
+Name : Lemmatizer_Case
+Type : Prediction
+Oracle : lemma_case
+Feature Model : data/feature_models/lemmatizer_rules.fm
+Action Set : data/lemmatizer_case.as
+Topology : (100,RELU,0.1)
+Dynamic : yes
diff --git a/UD_any/tokeparser_sequential/lemmatizer_lookup.cla b/UD_any/tokeparser_sequential/lemmatizer_lookup.cla
new file mode 100644
index 0000000..41f63b2
--- /dev/null
+++ b/UD_any/tokeparser_sequential/lemmatizer_lookup.cla
@@ -0,0 +1,4 @@
+Name : Lemmatizer_Lookup
+Type : Information
+Oracle : lemma_lookup
+Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm
diff --git a/UD_any/tokeparser_sequential/lemmatizer_rules.cla b/UD_any/tokeparser_sequential/lemmatizer_rules.cla
new file mode 100644
index 0000000..9b59b62
--- /dev/null
+++ b/UD_any/tokeparser_sequential/lemmatizer_rules.cla
@@ -0,0 +1,7 @@
+Name : Lemmatizer_Rules
+Type : Prediction
+Oracle : lemma_rules
+Feature Model : data/feature_models/lemmatizer_rules.fm
+Action Set : data/lemmatizer_rules.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_sequential/machine.tm b/UD_any/tokeparser_sequential/machine.tm
new file mode 100644
index 0000000..f591178
--- /dev/null
+++ b/UD_any/tokeparser_sequential/machine.tm
@@ -0,0 +1,39 @@
+Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine
+Dicts : tokeparser.dicts
+%CLASSIFIERS
+strategy strategy.cla
+tokenizer tokenizer.cla
+tagger tagger.cla
+morpho morpho.cla
+lemmatizer_lookup lemmatizer_lookup.cla
+lemmatizer_rules lemmatizer_rules.cla
+lemmatizer_case lemmatizer_case.cla
+parser parser.cla
+segmenter segmenter.cla
+%STATES
+strategy strategy
+tokenizer tokenizer
+tagger tagger
+morpho morpho
+lemmatizer_lookup lemmatizer_lookup
+lemmatizer_rules lemmatizer_rules
+lemmatizer_case lemmatizer_case
+parser parser
+segmenter segmenter
+%TRANSITIONS
+strategy tokenizer MOVE tokenizer
+strategy tagger MOVE tagger
+strategy morpho MOVE morpho
+strategy lemmatizer_lookup MOVE lemmatizer_lookup
+strategy lemmatizer_rules MOVE lemmatizer_rules
+strategy lemmatizer_case MOVE lemmatizer_case
+strategy parser MOVE parser
+strategy segmenter MOVE segmenter
+tokenizer strategy *
+tagger strategy *
+morpho strategy *
+lemmatizer_lookup strategy *
+lemmatizer_case strategy *
+lemmatizer_rules strategy *
+parser strategy *
+segmenter strategy *
diff --git a/UD_any/tokeparser_sequential/morpho.cla b/UD_any/tokeparser_sequential/morpho.cla
new file mode 100644
index 0000000..024d1e9
--- /dev/null
+++ b/UD_any/tokeparser_sequential/morpho.cla
@@ -0,0 +1,7 @@
+Name : Morpho
+Type : Prediction
+Oracle : morpho
+Feature Model : data/feature_models/morpho_nofuture.fm
+Action Set : data/morpho_parts.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_sequential/normal.tm b/UD_any/tokeparser_sequential/normal.tm
new file mode 100644
index 0000000..77ebff3
--- /dev/null
+++ b/UD_any/tokeparser_sequential/normal.tm
@@ -0,0 +1,31 @@
+Name : Tagger, Morpho, Lemmatizer and Parser Machine
+Dicts : tagparser.dicts
+%CLASSIFIERS
+strategy strategy.cla
+tagger tagger.cla
+morpho morpho.cla
+lemmatizer_lookup lemmatizer_lookup.cla
+lemmatizer_rules lemmatizer_rules.cla
+lemmatizer_case lemmatizer_case.cla
+parser parser.cla
+%STATES
+strategy strategy
+tagger tagger
+morpho morpho
+lemmatizer_lookup lemmatizer_lookup
+lemmatizer_rules lemmatizer_rules
+lemmatizer_case lemmatizer_case
+parser parser
+%TRANSITIONS
+strategy tagger MOVE tagger
+strategy morpho MOVE morpho
+strategy lemmatizer_lookup MOVE lemmatizer_lookup
+strategy lemmatizer_rules MOVE lemmatizer_rules
+strategy lemmatizer_case MOVE lemmatizer_case
+strategy parser MOVE parser
+tagger strategy *
+morpho strategy *
+lemmatizer_lookup strategy *
+lemmatizer_case strategy *
+lemmatizer_rules strategy *
+parser strategy *
diff --git a/UD_any/tokeparser_sequential/parser.cla b/UD_any/tokeparser_sequential/parser.cla
new file mode 100644
index 0000000..77714be
--- /dev/null
+++ b/UD_any/tokeparser_sequential/parser.cla
@@ -0,0 +1,7 @@
+Name : Parser
+Type : Prediction
+Oracle : parser
+Feature Model : data/feature_models/parser_nofuture.fm
+Action Set : data/parser.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_sequential/segmenter.cla b/UD_any/tokeparser_sequential/segmenter.cla
new file mode 100644
index 0000000..6a817d1
--- /dev/null
+++ b/UD_any/tokeparser_sequential/segmenter.cla
@@ -0,0 +1,7 @@
+Name : Segmenter
+Type : Prediction
+Oracle : segmenter
+Feature Model : data/feature_models/tagger.fm
+Action Set : data/segmenter.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_sequential/strategy.cla b/UD_any/tokeparser_sequential/strategy.cla
new file mode 100644
index 0000000..d21d9a7
--- /dev/null
+++ b/UD_any/tokeparser_sequential/strategy.cla
@@ -0,0 +1,4 @@
+Name : Strategy
+Type : Information
+Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser_sequential
+Oracle Filename : none
diff --git a/UD_any/tokeparser_sequential/tagger.cla b/UD_any/tokeparser_sequential/tagger.cla
new file mode 100644
index 0000000..c98cb92
--- /dev/null
+++ b/UD_any/tokeparser_sequential/tagger.cla
@@ -0,0 +1,7 @@
+Name : Tagger
+Type : Prediction
+Oracle : tagger
+Feature Model : data/feature_models/tagger_nofuture.fm
+Action Set : data/tagger.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_sequential/test.bd b/UD_any/tokeparser_sequential/test.bd
new file mode 100644
index 0000000..b2ef996
--- /dev/null
+++ b/UD_any/tokeparser_sequential/test.bd
@@ -0,0 +1,11 @@
+#Index Name   ref/hyp dict    Policy   Must print?#
+###################################################
+0      ID     hyp     none    Final    1
+1      FORM   hyp     form    Final    1
+3      POS    hyp     pos     Final    1
+4      XPOS   hyp     pos     Final    1
+5      MORPHO hyp     morpho  Final    1
+2      LEMMA  hyp     form    Final    1
+6      GOV    hyp     int     Final    1
+7      LABEL  hyp     labels  Final    1
+0      EOS    hyp     eos     Final    0
diff --git a/UD_any/tokeparser_sequential/tokenizer.cla b/UD_any/tokeparser_sequential/tokenizer.cla
new file mode 100644
index 0000000..cdd5a2e
--- /dev/null
+++ b/UD_any/tokeparser_sequential/tokenizer.cla
@@ -0,0 +1,7 @@
+Name : Tokenizer
+Type : Prediction
+Oracle : tokenizer
+Feature Model : data/feature_models/tokenizer.fm
+Action Set : data/tokenizer.as
+Topology : (500,RELU,0.3)
+Dynamic : no
diff --git a/UD_any/tokeparser_sequential/tokeparser.dicts b/UD_any/tokeparser_sequential/tokeparser.dicts
new file mode 100644
index 0000000..c9235b9
--- /dev/null
+++ b/UD_any/tokeparser_sequential/tokeparser.dicts
@@ -0,0 +1,102 @@
+#Name   Dimension Mode     #
+############################
+# TOKENIZER
+Tokenizer_bool    02        Embeddings _ 5
+Tokenizer_int     05        Embeddings _ 200
+Tokenizer_letters 30        Embeddings _ 200000
+Tokenizer_form    30        Embeddings _ 50000
+Tokenizer_form.f  30        Embeddings _ 200000
+Tokenizer_actions 05        Embeddings _ 200
+# TAGGER
+Tagger_actions 18        Embeddings _ 20
+Tagger_bool    16        Embeddings _ 5
+Tagger_int     16        Embeddings _ 200
+Tagger_eos     16        Embeddings _ 5
+Tagger_gov     16        Embeddings _ 100
+Tagger_pos     18        Embeddings _ 21
+Tagger_form    30        Embeddings _ 50000
+Tagger_form.f  30        Embeddings _ 200000
+Tagger_lemma   30        Embeddings _ 50000
+Tagger_letters 30        Embeddings _ 200000
+Tagger_labels  18        Embeddings _ 50
+Tagger_morpho  22        Embeddings _ 50000
+# MORPHO
+Morpho_actions 18        Embeddings _ 1000
+Morpho_bool    16        Embeddings _ 5
+Morpho_int     16        Embeddings _ 200
+Morpho_eos     16        Embeddings _ 5
+Morpho_gov     16        Embeddings _ 100
+Morpho_pos     18        Embeddings _ 21
+Morpho_form    30        Embeddings _ 50000
+Morpho_form.f  30        Embeddings _ 20000
+Morpho_lemma   30        Embeddings _ 50000
+Morpho_letters 30        Embeddings _ 20000
+Morpho_labels  18        Embeddings _ 50
+Morpho_morpho  22        Embeddings _ 50000
+# LEMMATIZER
+Lemmatizer_Rules_form    30        Embeddings _ 50000
+Lemmatizer_Rules_letters 10        Embeddings _ 200000
+Lemmatizer_Rules_pos     30        Embeddings _ 21
+Lemmatizer_Rules_morpho  30        Embeddings _ 50000
+# LEMMATIZER
+Lemmatizer_Case_form    30        Embeddings _ 50000
+Lemmatizer_Case_letters 10        Embeddings _ 200000
+Lemmatizer_Case_pos     30        Embeddings _ 21
+Lemmatizer_Case_morpho  30        Embeddings _ 50000
+# PARSER
+Parser_actions 18        Embeddings _ 200
+Parser_bool    16        Embeddings _ 5
+Parser_int     16        Embeddings _ 200
+Parser_eos     16        Embeddings _ 5
+Parser_gov     16        Embeddings _ 100
+Parser_pos     18        Embeddings _ 21
+Parser_form    30        Embeddings _ 50000
+Parser_form.f  30        Embeddings _ 20000
+Parser_lemma   30        Embeddings _ 50000
+Parser_letters 30        Embeddings _ 20000
+Parser_labels  18        Embeddings _ 50
+Parser_morpho  22        Embeddings _ 50000
+# SEGMENTER
+Segmenter_bool    02        Embeddings _ 5
+Segmenter_int     05        Embeddings _ 200
+Segmenter_letters 30        Embeddings _ 200000
+Segmenter_pos     15        Embeddings _ 21
+Segmenter_form    30        Embeddings _ 50000
+Segmenter_form.f  30        Embeddings _ 200000
+Segmenter_actions 05        Embeddings _ 21
+# ERROR_TAGGER
+Error_Tagger_actions 18        Embeddings _
+Error_Tagger_bool    16        Embeddings _
+Error_Tagger_int     16        Embeddings _
+Error_Tagger_eos     16        Embeddings _
+Error_Tagger_gov     16        Embeddings _
+Error_Tagger_pos     18        Embeddings _
+Error_Tagger_form    30        Embeddings _
+Error_Tagger_lemma   30        Embeddings _
+Error_Tagger_letters 30        Embeddings _
+Error_Tagger_labels  18        Embeddings _
+Error_Tagger_morpho  22        Embeddings _
+# ERROR_MORPHO
+Error_Morpho_actions 18        Embeddings _
+Error_Morpho_bool    16        Embeddings _
+Error_Morpho_int     16        Embeddings _
+Error_Morpho_eos     16        Embeddings _
+Error_Morpho_gov     16        Embeddings _
+Error_Morpho_pos     18        Embeddings _
+Error_Morpho_form    30        Embeddings _
+Error_Morpho_lemma   30        Embeddings _
+Error_Morpho_letters 30        Embeddings _
+Error_Morpho_labels  18        Embeddings _
+Error_Morpho_morpho  22        Embeddings _
+# ERROR_PARSER
+Error_Parser_actions 18        Embeddings _
+Error_Parser_bool    16        Embeddings _
+Error_Parser_int     16        Embeddings _
+Error_Parser_eos     16        Embeddings _
+Error_Parser_gov     16        Embeddings _
+Error_Parser_pos     18        Embeddings _
+Error_Parser_form    30        Embeddings _
+Error_Parser_lemma   30        Embeddings _
+Error_Parser_letters 30        Embeddings _
+Error_Parser_labels  18        Embeddings _
+Error_Parser_morpho  22        Embeddings _
diff --git a/UD_any/tokeparser_sequential/train.bd b/UD_any/tokeparser_sequential/train.bd
new file mode 100644
index 0000000..963c311
--- /dev/null
+++ b/UD_any/tokeparser_sequential/train.bd
@@ -0,0 +1,11 @@
+#Index Name   ref/hyp dict    Policy   Must print?#
+###################################################
+0      ID     hyp     none    FromZero 1
+1      FORM   hyp     form    FromZero 1
+3      POS    hyp     pos     FromZero 1
+4      XPOS   hyp     pos     FromZero 1
+5      MORPHO hyp     morpho  FromZero 1
+2      LEMMA  hyp     form    FromZero 1
+6      GOV    hyp     int     FromZero 1
+7      LABEL  hyp     labels  FromZero 1
+0      EOS    hyp     eos     FromZero 0
-- 
GitLab