From 10107218cb0733f3a04d6a5c7589dce8a84e90aa Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Thu, 14 Nov 2019 21:01:39 +0100
Subject: [PATCH] Created first version of tokeparser_incremental

---
 .../data/feature_models/morpho_incremental.fm | 28 +++++++++
 .../data/feature_models/tagger_incremental.fm | 25 ++++++++
 .../feature_models/tokenizer_incremental.fm   | 42 +++++++++++++
 .../lemmatizer_case.cla                       |  7 +++
 .../lemmatizer_lookup.cla                     |  4 ++
 .../lemmatizer_rules.cla                      |  7 +++
 UD_any/tokeparser_incremental/machine.tm      | 35 +++++++++++
 UD_any/tokeparser_incremental/morpho.cla      |  7 +++
 UD_any/tokeparser_incremental/normal.tm       | 31 ++++++++++
 UD_any/tokeparser_incremental/parser.cla      |  7 +++
 UD_any/tokeparser_incremental/strategy.cla    |  4 ++
 UD_any/tokeparser_incremental/tagger.cla      |  7 +++
 UD_any/tokeparser_incremental/test.bd         | 11 ++++
 UD_any/tokeparser_incremental/tokenizer.cla   |  7 +++
 .../tokeparser_incremental/tokeparser.dicts   | 62 +++++++++++++++++++
 UD_any/tokeparser_incremental/train.bd        | 11 ++++
 16 files changed, 295 insertions(+)
 create mode 100644 UD_any/data/feature_models/morpho_incremental.fm
 create mode 100644 UD_any/data/feature_models/tagger_incremental.fm
 create mode 100644 UD_any/data/feature_models/tokenizer_incremental.fm
 create mode 100644 UD_any/tokeparser_incremental/lemmatizer_case.cla
 create mode 100644 UD_any/tokeparser_incremental/lemmatizer_lookup.cla
 create mode 100644 UD_any/tokeparser_incremental/lemmatizer_rules.cla
 create mode 100644 UD_any/tokeparser_incremental/machine.tm
 create mode 100644 UD_any/tokeparser_incremental/morpho.cla
 create mode 100644 UD_any/tokeparser_incremental/normal.tm
 create mode 100644 UD_any/tokeparser_incremental/parser.cla
 create mode 100644 UD_any/tokeparser_incremental/strategy.cla
 create mode 100644 UD_any/tokeparser_incremental/tagger.cla
 create mode 100644 UD_any/tokeparser_incremental/test.bd
 create mode 100644 UD_any/tokeparser_incremental/tokenizer.cla
 create mode 100644 UD_any/tokeparser_incremental/tokeparser.dicts
 create mode 100644 UD_any/tokeparser_incremental/train.bd

diff --git a/UD_any/data/feature_models/morpho_incremental.fm b/UD_any/data/feature_models/morpho_incremental.fm
new file mode 100644
index 0000000..2086605
--- /dev/null
+++ b/UD_any/data/feature_models/morpho_incremental.fm
@@ -0,0 +1,28 @@
+# FORM
+b.-2#FORM.fasttext
+b.-1#FORM.fasttext
+b.0#FORM.fasttext
+# POS
+b.-3#POS
+b.-2#POS
+b.-1#POS
+b.0#POS
+# MORPHO
+b.-2#MORPHO
+b.-1#MORPHO
+b.0#MORPHO
+# UPPERCASE
+b.0#FORM.U
+# LENGTH
+b.0#FORM.LEN
+# SUFFIXES
+b.0#FORM.PART.-4.-4
+b.0#FORM.PART.-3.-3
+b.0#FORM.PART.-2.-2
+b.0#FORM.PART.-1.-1
+# PREFIXES
+b.0#FORM.PART.0.0
+b.0#FORM.PART.1.1
+b.0#FORM.PART.2.2
+b.0#FORM.PART.3.3
+
diff --git a/UD_any/data/feature_models/tagger_incremental.fm b/UD_any/data/feature_models/tagger_incremental.fm
new file mode 100644
index 0000000..1bd749b
--- /dev/null
+++ b/UD_any/data/feature_models/tagger_incremental.fm
@@ -0,0 +1,25 @@
+# FORM
+b.-2#FORM.fasttext
+b.-1#FORM.fasttext
+b.0#FORM.fasttext
+# POS
+b.-3#POS
+b.-2#POS
+b.-1#POS
+# UPPERCASE
+b.0#FORM.U
+# LENGTH
+b.0#FORM.LEN
+# SUFFIXES
+b.0#FORM.PART.-4.-4
+b.0#FORM.PART.-3.-3
+b.0#FORM.PART.-2.-2
+b.0#FORM.PART.-1.-1
+# PREFIXES
+b.0#FORM.PART.0.0
+b.0#FORM.PART.1.1
+b.0#FORM.PART.2.2
+b.0#FORM.PART.3.3
+# MORPHO
+b.-2#MORPHO
+b.-1#MORPHO
diff --git a/UD_any/data/feature_models/tokenizer_incremental.fm b/UD_any/data/feature_models/tokenizer_incremental.fm
new file mode 100644
index 0000000..d10a62e
--- /dev/null
+++ b/UD_any/data/feature_models/tokenizer_incremental.fm
@@ -0,0 +1,42 @@
+# FORM
+b.-2#FORM.fasttext
+b.-1#FORM.fasttext
+b.0#FORM.fasttext
+# UPPERCASE
+b.-1#FORM.U
+# LENGTH
+b.-1#FORM.LEN
+b.0#FORM.LEN
+# SUFFIXES
+b.0#FORM.PART.-4.-4
+b.0#FORM.PART.-3.-3
+b.0#FORM.PART.-2.-2
+b.0#FORM.PART.-1.-1
+# PREFIXES
+b.0#FORM.PART.0.0
+b.0#FORM.PART.1.1
+b.0#FORM.PART.2.2
+b.0#FORM.PART.3.3
+# RAW INPUT
+raw.-5
+raw.-4
+raw.-3
+raw.-2
+raw.-1
+raw.0
+raw.2
+raw.3
+raw.4
+raw.5
+raw.6
+# POS
+b.-2#POS
+b.-1#POS
+b.0#POS
+# MORPHO
+b.-2#MORPHO
+b.-1#MORPHO
+b.0#MORPHO
+# EOS
+b.-1#EOS
+b.-2#EOS
diff --git a/UD_any/tokeparser_incremental/lemmatizer_case.cla b/UD_any/tokeparser_incremental/lemmatizer_case.cla
new file mode 100644
index 0000000..0994d40
--- /dev/null
+++ b/UD_any/tokeparser_incremental/lemmatizer_case.cla
@@ -0,0 +1,7 @@
+Name : Lemmatizer_Case
+Type : Prediction
+Oracle : lemma_case
+Feature Model : data/feature_models/lemmatizer_rules.fm
+Action Set : data/lemmatizer_case.as
+Topology : (100,RELU,0.1)
+Dynamic : yes
diff --git a/UD_any/tokeparser_incremental/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental/lemmatizer_lookup.cla
new file mode 100644
index 0000000..41f63b2
--- /dev/null
+++ b/UD_any/tokeparser_incremental/lemmatizer_lookup.cla
@@ -0,0 +1,4 @@
+Name : Lemmatizer_Lookup
+Type : Information
+Oracle : lemma_lookup
+Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm
diff --git a/UD_any/tokeparser_incremental/lemmatizer_rules.cla b/UD_any/tokeparser_incremental/lemmatizer_rules.cla
new file mode 100644
index 0000000..9b59b62
--- /dev/null
+++ b/UD_any/tokeparser_incremental/lemmatizer_rules.cla
@@ -0,0 +1,7 @@
+Name : Lemmatizer_Rules
+Type : Prediction
+Oracle : lemma_rules
+Feature Model : data/feature_models/lemmatizer_rules.fm
+Action Set : data/lemmatizer_rules.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_incremental/machine.tm b/UD_any/tokeparser_incremental/machine.tm
new file mode 100644
index 0000000..0e1b043
--- /dev/null
+++ b/UD_any/tokeparser_incremental/machine.tm
@@ -0,0 +1,35 @@
+Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine
+Dicts : tokeparser.dicts
+%CLASSIFIERS
+strategy strategy.cla
+tokenizer tokenizer.cla
+tagger tagger.cla
+morpho morpho.cla
+lemmatizer_lookup lemmatizer_lookup.cla
+lemmatizer_rules lemmatizer_rules.cla
+lemmatizer_case lemmatizer_case.cla
+parser parser.cla
+%STATES
+strategy strategy
+tokenizer tokenizer
+tagger tagger
+morpho morpho
+lemmatizer_lookup lemmatizer_lookup
+lemmatizer_rules lemmatizer_rules
+lemmatizer_case lemmatizer_case
+parser parser
+%TRANSITIONS
+strategy tokenizer MOVE tokenizer
+strategy tagger MOVE tagger
+strategy morpho MOVE morpho
+strategy lemmatizer_lookup MOVE lemmatizer_lookup
+strategy lemmatizer_rules MOVE lemmatizer_rules
+strategy lemmatizer_case MOVE lemmatizer_case
+strategy parser MOVE parser
+tokenizer strategy *
+tagger strategy *
+morpho strategy *
+lemmatizer_lookup strategy *
+lemmatizer_case strategy *
+lemmatizer_rules strategy *
+parser strategy *
diff --git a/UD_any/tokeparser_incremental/morpho.cla b/UD_any/tokeparser_incremental/morpho.cla
new file mode 100644
index 0000000..63340a0
--- /dev/null
+++ b/UD_any/tokeparser_incremental/morpho.cla
@@ -0,0 +1,7 @@
+Name : Morpho
+Type : Prediction
+Oracle : morpho
+Feature Model : data/feature_models/morpho_incremental.fm
+Action Set : data/morpho_parts.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_incremental/normal.tm b/UD_any/tokeparser_incremental/normal.tm
new file mode 100644
index 0000000..77ebff3
--- /dev/null
+++ b/UD_any/tokeparser_incremental/normal.tm
@@ -0,0 +1,31 @@
+Name : Tagger, Morpho, Lemmatizer and Parser Machine
+Dicts : tagparser.dicts
+%CLASSIFIERS
+strategy strategy.cla
+tagger tagger.cla
+morpho morpho.cla
+lemmatizer_lookup lemmatizer_lookup.cla
+lemmatizer_rules lemmatizer_rules.cla
+lemmatizer_case lemmatizer_case.cla
+parser parser.cla
+%STATES
+strategy strategy
+tagger tagger
+morpho morpho
+lemmatizer_lookup lemmatizer_lookup
+lemmatizer_rules lemmatizer_rules
+lemmatizer_case lemmatizer_case
+parser parser
+%TRANSITIONS
+strategy tagger MOVE tagger
+strategy morpho MOVE morpho
+strategy lemmatizer_lookup MOVE lemmatizer_lookup
+strategy lemmatizer_rules MOVE lemmatizer_rules
+strategy lemmatizer_case MOVE lemmatizer_case
+strategy parser MOVE parser
+tagger strategy *
+morpho strategy *
+lemmatizer_lookup strategy *
+lemmatizer_case strategy *
+lemmatizer_rules strategy *
+parser strategy *
diff --git a/UD_any/tokeparser_incremental/parser.cla b/UD_any/tokeparser_incremental/parser.cla
new file mode 100644
index 0000000..77714be
--- /dev/null
+++ b/UD_any/tokeparser_incremental/parser.cla
@@ -0,0 +1,7 @@
+Name : Parser
+Type : Prediction
+Oracle : parser
+Feature Model : data/feature_models/parser_nofuture.fm
+Action Set : data/parser.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_incremental/strategy.cla b/UD_any/tokeparser_incremental/strategy.cla
new file mode 100644
index 0000000..fcf66b5
--- /dev/null
+++ b/UD_any/tokeparser_incremental/strategy.cla
@@ -0,0 +1,4 @@
+Name : Strategy
+Type : Information
+Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser
+Oracle Filename : none
diff --git a/UD_any/tokeparser_incremental/tagger.cla b/UD_any/tokeparser_incremental/tagger.cla
new file mode 100644
index 0000000..9aa35ed
--- /dev/null
+++ b/UD_any/tokeparser_incremental/tagger.cla
@@ -0,0 +1,7 @@
+Name : Tagger
+Type : Prediction
+Oracle : tagger
+Feature Model : data/feature_models/tagger_incremental.fm
+Action Set : data/tagger.as
+Topology : (500,RELU,0.3)
+Dynamic : yes
diff --git a/UD_any/tokeparser_incremental/test.bd b/UD_any/tokeparser_incremental/test.bd
new file mode 100644
index 0000000..b2ef996
--- /dev/null
+++ b/UD_any/tokeparser_incremental/test.bd
@@ -0,0 +1,11 @@
+#Index Name   ref/hyp dict    Policy   Must print?#
+###################################################
+0      ID     hyp     none    Final    1
+1      FORM   hyp     form    Final    1
+3      POS    hyp     pos     Final    1
+4      XPOS   hyp     pos     Final    1
+5      MORPHO hyp     morpho  Final    1
+2      LEMMA  hyp     form    Final    1
+6      GOV    hyp     int     Final    1
+7      LABEL  hyp     labels  Final    1
+0      EOS    hyp     eos     Final    0
diff --git a/UD_any/tokeparser_incremental/tokenizer.cla b/UD_any/tokeparser_incremental/tokenizer.cla
new file mode 100644
index 0000000..127a62e
--- /dev/null
+++ b/UD_any/tokeparser_incremental/tokenizer.cla
@@ -0,0 +1,7 @@
+Name : Tokenizer
+Type : Prediction
+Oracle : tokenizer
+Feature Model : data/feature_models/tokenizer_incremental.fm
+Action Set : data/tokenizer.as
+Topology : (500,RELU,0.3)
+Dynamic : no
diff --git a/UD_any/tokeparser_incremental/tokeparser.dicts b/UD_any/tokeparser_incremental/tokeparser.dicts
new file mode 100644
index 0000000..01ff2b9
--- /dev/null
+++ b/UD_any/tokeparser_incremental/tokeparser.dicts
@@ -0,0 +1,62 @@
+#Name   Dimension Mode     #
+############################
+# TOKENIZER
+Tokenizer_bool    02        Embeddings
+Tokenizer_int     05        Embeddings
+Tokenizer_letters 30        Embeddings
+Tokenizer_form    30        Embeddings
+Tokenizer_form.f  30        Embeddings
+Tokenizer_actions 05        Embeddings
+Tokenizer_entropy 05        Embeddings
+Tokenizer_pos     18        Embeddings
+Tokenizer_morpho  22        Embeddings
+Tokenizer_eos     16        Embeddings
+# TAGGER
+Tagger_actions 18        Embeddings _
+Tagger_bool    16        Embeddings _
+Tagger_int     16        Embeddings _
+Tagger_eos     16        Embeddings _
+Tagger_gov     16        Embeddings _
+Tagger_pos     18        Embeddings _
+Tagger_form    30        Embeddings _
+Tagger_form.f  30        Embeddings _
+Tagger_lemma   30        Embeddings _
+Tagger_letters 30        Embeddings _
+Tagger_labels  18        Embeddings _
+Tagger_morpho  22        Embeddings _
+# MORPHO
+Morpho_actions 18        Embeddings _
+Morpho_bool    16        Embeddings _
+Morpho_int     16        Embeddings _
+Morpho_eos     16        Embeddings _
+Morpho_gov     16        Embeddings _
+Morpho_pos     18        Embeddings _
+Morpho_form    30        Embeddings _
+Morpho_form.f  30        Embeddings _
+Morpho_lemma   30        Embeddings _
+Morpho_letters 30        Embeddings _
+Morpho_labels  18        Embeddings _
+Morpho_morpho  22        Embeddings _
+# LEMMATIZER
+Lemmatizer_Rules_form    30        Embeddings
+Lemmatizer_Rules_letters 10        Embeddings
+Lemmatizer_Rules_pos     30        Embeddings
+Lemmatizer_Rules_morpho  30        Embeddings
+# LEMMATIZER
+Lemmatizer_Case_form    30        Embeddings
+Lemmatizer_Case_letters 10        Embeddings
+Lemmatizer_Case_pos     30        Embeddings
+Lemmatizer_Case_morpho  30        Embeddings
+# PARSER
+Parser_actions 18        Embeddings _
+Parser_bool    16        Embeddings _
+Parser_int     16        Embeddings _
+Parser_eos     16        Embeddings _
+Parser_gov     16        Embeddings _
+Parser_pos     18        Embeddings _
+Parser_form    30        Embeddings _
+Parser_form.f  30        Embeddings _
+Parser_lemma   30        Embeddings _
+Parser_letters 30        Embeddings _
+Parser_labels  18        Embeddings _
+Parser_morpho  22        Embeddings _
diff --git a/UD_any/tokeparser_incremental/train.bd b/UD_any/tokeparser_incremental/train.bd
new file mode 100644
index 0000000..963c311
--- /dev/null
+++ b/UD_any/tokeparser_incremental/train.bd
@@ -0,0 +1,11 @@
+#Index Name   ref/hyp dict    Policy   Must print?#
+###################################################
+0      ID     hyp     none    FromZero 1
+1      FORM   hyp     form    FromZero 1
+3      POS    hyp     pos     FromZero 1
+4      XPOS   hyp     pos     FromZero 1
+5      MORPHO hyp     morpho  FromZero 1
+2      LEMMA  hyp     form    FromZero 1
+6      GOV    hyp     int     FromZero 1
+7      LABEL  hyp     labels  FromZero 1
+0      EOS    hyp     eos     FromZero 0
-- 
GitLab