From ecf7429902a386d1121819a3a7c8119754c83cc6 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 6 Dec 2019 16:18:48 +0100 Subject: [PATCH] Added new features models and machines --- .../b0/lemmatizer_rules_incremental.fm | 16 +++ .../feature_models/b0/morpho_incremental.fm | 41 ++++++ .../data/feature_models/b0/parser_nofuture.fm | 98 +++++++++++++ .../feature_models/b0/tagger_incremental.fm | 24 ++++ .../b0/tokenizer_incremental.fm | 43 ++++++ .../b1/lemmatizer_rules_incremental.fm | 21 +++ .../feature_models/b1/morpho_incremental.fm | 45 ++++++ .../data/feature_models/b1/parser_nofuture.fm | 106 ++++++++++++++ .../feature_models/b1/tagger_incremental.fm | 43 ++++++ .../b1/tokenizer_incremental.fm | 51 +++++++ .../b2/lemmatizer_rules_incremental.fm | 25 ++++ .../feature_models/b2/morpho_incremental.fm | 49 +++++++ .../data/feature_models/b2/parser_nofuture.fm | 111 +++++++++++++++ .../feature_models/b2/tagger_incremental.fm | 47 +++++++ .../b2/tokenizer_incremental.fm | 55 ++++++++ .../b3/lemmatizer_rules_incremental.fm | 29 ++++ .../feature_models/b3/morpho_incremental.fm | 53 +++++++ .../data/feature_models/b3/parser_nofuture.fm | 116 ++++++++++++++++ .../feature_models/b3/tagger_incremental.fm | 51 +++++++ .../b3/tokenizer_incremental.fm | 59 ++++++++ .../b4/lemmatizer_rules_incremental.fm | 30 ++++ .../feature_models/b4/morpho_incremental.fm | 57 ++++++++ .../data/feature_models/b4/parser_nofuture.fm | 121 ++++++++++++++++ .../feature_models/b4/tagger_incremental.fm | 55 ++++++++ .../b4/tokenizer_incremental.fm | 63 +++++++++ .../b5/lemmatizer_rules_incremental.fm | 33 +++++ .../feature_models/b5/morpho_incremental.fm | 61 ++++++++ .../data/feature_models/b5/parser_nofuture.fm | 126 +++++++++++++++++ .../feature_models/b5/tagger_incremental.fm | 59 ++++++++ .../b5/tokenizer_incremental.fm | 67 +++++++++ .../b6/lemmatizer_rules_incremental.fm | 36 +++++ .../feature_models/b6/morpho_incremental.fm | 65 +++++++++ .../data/feature_models/b6/parser_nofuture.fm | 131 ++++++++++++++++++ .../feature_models/b6/tagger_incremental.fm | 63 +++++++++ .../b6/tokenizer_incremental.fm | 71 ++++++++++ .../lemmatizer_case.cla | 7 + .../lemmatizer_lookup.cla | 4 + .../lemmatizer_rules.cla | 7 + UD_any/tokeparser_incremental_b0/machine.tm | 39 ++++++ UD_any/tokeparser_incremental_b0/morpho.cla | 7 + UD_any/tokeparser_incremental_b0/normal.tm | 31 +++++ UD_any/tokeparser_incremental_b0/parser.cla | 7 + .../tokeparser_incremental_b0/segmenter.cla | 7 + UD_any/tokeparser_incremental_b0/strategy.cla | 4 + UD_any/tokeparser_incremental_b0/tagger.cla | 7 + UD_any/tokeparser_incremental_b0/test.bd | 11 ++ .../tokeparser_incremental_b0/tokenizer.cla | 7 + .../tokeparser.dicts | 93 +++++++++++++ UD_any/tokeparser_incremental_b0/train.bd | 11 ++ .../lemmatizer_case.cla | 7 + .../lemmatizer_lookup.cla | 4 + .../lemmatizer_rules.cla | 7 + UD_any/tokeparser_incremental_b1/machine.tm | 39 ++++++ UD_any/tokeparser_incremental_b1/morpho.cla | 7 + UD_any/tokeparser_incremental_b1/normal.tm | 31 +++++ UD_any/tokeparser_incremental_b1/parser.cla | 7 + .../tokeparser_incremental_b1/segmenter.cla | 7 + UD_any/tokeparser_incremental_b1/strategy.cla | 4 + UD_any/tokeparser_incremental_b1/tagger.cla | 7 + UD_any/tokeparser_incremental_b1/test.bd | 11 ++ .../tokeparser_incremental_b1/tokenizer.cla | 7 + .../tokeparser.dicts | 93 +++++++++++++ UD_any/tokeparser_incremental_b1/train.bd | 11 ++ .../lemmatizer_case.cla | 7 + .../lemmatizer_lookup.cla | 4 + .../lemmatizer_rules.cla | 7 + UD_any/tokeparser_incremental_b2/machine.tm | 39 ++++++ UD_any/tokeparser_incremental_b2/morpho.cla | 7 + UD_any/tokeparser_incremental_b2/normal.tm | 31 +++++ UD_any/tokeparser_incremental_b2/parser.cla | 7 + .../tokeparser_incremental_b2/segmenter.cla | 7 + UD_any/tokeparser_incremental_b2/strategy.cla | 4 + UD_any/tokeparser_incremental_b2/tagger.cla | 7 + UD_any/tokeparser_incremental_b2/test.bd | 11 ++ .../tokeparser_incremental_b2/tokenizer.cla | 7 + .../tokeparser.dicts | 93 +++++++++++++ UD_any/tokeparser_incremental_b2/train.bd | 11 ++ .../lemmatizer_case.cla | 7 + .../lemmatizer_lookup.cla | 4 + .../lemmatizer_rules.cla | 7 + UD_any/tokeparser_incremental_b3/machine.tm | 39 ++++++ UD_any/tokeparser_incremental_b3/morpho.cla | 7 + UD_any/tokeparser_incremental_b3/normal.tm | 31 +++++ UD_any/tokeparser_incremental_b3/parser.cla | 7 + .../tokeparser_incremental_b3/segmenter.cla | 7 + UD_any/tokeparser_incremental_b3/strategy.cla | 4 + UD_any/tokeparser_incremental_b3/tagger.cla | 7 + UD_any/tokeparser_incremental_b3/test.bd | 11 ++ .../tokeparser_incremental_b3/tokenizer.cla | 7 + .../tokeparser.dicts | 93 +++++++++++++ UD_any/tokeparser_incremental_b3/train.bd | 11 ++ .../lemmatizer_case.cla | 7 + .../lemmatizer_lookup.cla | 4 + .../lemmatizer_rules.cla | 7 + UD_any/tokeparser_incremental_b4/machine.tm | 39 ++++++ UD_any/tokeparser_incremental_b4/morpho.cla | 7 + UD_any/tokeparser_incremental_b4/normal.tm | 31 +++++ UD_any/tokeparser_incremental_b4/parser.cla | 7 + .../tokeparser_incremental_b4/segmenter.cla | 7 + UD_any/tokeparser_incremental_b4/strategy.cla | 4 + UD_any/tokeparser_incremental_b4/tagger.cla | 7 + UD_any/tokeparser_incremental_b4/test.bd | 11 ++ .../tokeparser_incremental_b4/tokenizer.cla | 7 + .../tokeparser.dicts | 93 +++++++++++++ UD_any/tokeparser_incremental_b4/train.bd | 11 ++ .../lemmatizer_case.cla | 7 + .../lemmatizer_lookup.cla | 4 + .../lemmatizer_rules.cla | 7 + UD_any/tokeparser_incremental_b5/machine.tm | 39 ++++++ UD_any/tokeparser_incremental_b5/morpho.cla | 7 + UD_any/tokeparser_incremental_b5/normal.tm | 31 +++++ UD_any/tokeparser_incremental_b5/parser.cla | 7 + .../tokeparser_incremental_b5/segmenter.cla | 7 + UD_any/tokeparser_incremental_b5/strategy.cla | 4 + UD_any/tokeparser_incremental_b5/tagger.cla | 7 + UD_any/tokeparser_incremental_b5/test.bd | 11 ++ .../tokeparser_incremental_b5/tokenizer.cla | 7 + .../tokeparser.dicts | 93 +++++++++++++ UD_any/tokeparser_incremental_b5/train.bd | 11 ++ .../lemmatizer_case.cla | 7 + .../lemmatizer_lookup.cla | 4 + .../lemmatizer_rules.cla | 7 + UD_any/tokeparser_incremental_b6/machine.tm | 39 ++++++ UD_any/tokeparser_incremental_b6/morpho.cla | 7 + UD_any/tokeparser_incremental_b6/normal.tm | 31 +++++ UD_any/tokeparser_incremental_b6/parser.cla | 7 + .../tokeparser_incremental_b6/segmenter.cla | 7 + UD_any/tokeparser_incremental_b6/strategy.cla | 4 + UD_any/tokeparser_incremental_b6/tagger.cla | 7 + UD_any/tokeparser_incremental_b6/test.bd | 11 ++ .../tokeparser_incremental_b6/tokenizer.cla | 7 + .../tokeparser.dicts | 93 +++++++++++++ UD_any/tokeparser_incremental_b6/train.bd | 11 ++ 133 files changed, 3815 insertions(+) create mode 100644 UD_any/data/feature_models/b0/lemmatizer_rules_incremental.fm create mode 100644 UD_any/data/feature_models/b0/morpho_incremental.fm create mode 100644 UD_any/data/feature_models/b0/parser_nofuture.fm create mode 100644 UD_any/data/feature_models/b0/tagger_incremental.fm create mode 100644 UD_any/data/feature_models/b0/tokenizer_incremental.fm create mode 100644 UD_any/data/feature_models/b1/lemmatizer_rules_incremental.fm create mode 100644 UD_any/data/feature_models/b1/morpho_incremental.fm create mode 100644 UD_any/data/feature_models/b1/parser_nofuture.fm create mode 100644 UD_any/data/feature_models/b1/tagger_incremental.fm create mode 100644 UD_any/data/feature_models/b1/tokenizer_incremental.fm create mode 100644 UD_any/data/feature_models/b2/lemmatizer_rules_incremental.fm create mode 100644 UD_any/data/feature_models/b2/morpho_incremental.fm create mode 100644 UD_any/data/feature_models/b2/parser_nofuture.fm create mode 100644 UD_any/data/feature_models/b2/tagger_incremental.fm create mode 100644 UD_any/data/feature_models/b2/tokenizer_incremental.fm create mode 100644 UD_any/data/feature_models/b3/lemmatizer_rules_incremental.fm create mode 100644 UD_any/data/feature_models/b3/morpho_incremental.fm create mode 100644 UD_any/data/feature_models/b3/parser_nofuture.fm create mode 100644 UD_any/data/feature_models/b3/tagger_incremental.fm create mode 100644 UD_any/data/feature_models/b3/tokenizer_incremental.fm create mode 100644 UD_any/data/feature_models/b4/lemmatizer_rules_incremental.fm create mode 100644 UD_any/data/feature_models/b4/morpho_incremental.fm create mode 100644 UD_any/data/feature_models/b4/parser_nofuture.fm create mode 100644 UD_any/data/feature_models/b4/tagger_incremental.fm create mode 100644 UD_any/data/feature_models/b4/tokenizer_incremental.fm create mode 100644 UD_any/data/feature_models/b5/lemmatizer_rules_incremental.fm create mode 100644 UD_any/data/feature_models/b5/morpho_incremental.fm create mode 100644 UD_any/data/feature_models/b5/parser_nofuture.fm create mode 100644 UD_any/data/feature_models/b5/tagger_incremental.fm create mode 100644 UD_any/data/feature_models/b5/tokenizer_incremental.fm create mode 100644 UD_any/data/feature_models/b6/lemmatizer_rules_incremental.fm create mode 100644 UD_any/data/feature_models/b6/morpho_incremental.fm create mode 100644 UD_any/data/feature_models/b6/parser_nofuture.fm create mode 100644 UD_any/data/feature_models/b6/tagger_incremental.fm create mode 100644 UD_any/data/feature_models/b6/tokenizer_incremental.fm create mode 100644 UD_any/tokeparser_incremental_b0/lemmatizer_case.cla create mode 100644 UD_any/tokeparser_incremental_b0/lemmatizer_lookup.cla create mode 100644 UD_any/tokeparser_incremental_b0/lemmatizer_rules.cla create mode 100644 UD_any/tokeparser_incremental_b0/machine.tm create mode 100644 UD_any/tokeparser_incremental_b0/morpho.cla create mode 100644 UD_any/tokeparser_incremental_b0/normal.tm create mode 100644 UD_any/tokeparser_incremental_b0/parser.cla create mode 100644 UD_any/tokeparser_incremental_b0/segmenter.cla create mode 100644 UD_any/tokeparser_incremental_b0/strategy.cla create mode 100644 UD_any/tokeparser_incremental_b0/tagger.cla create mode 100644 UD_any/tokeparser_incremental_b0/test.bd create mode 100644 UD_any/tokeparser_incremental_b0/tokenizer.cla create mode 100644 UD_any/tokeparser_incremental_b0/tokeparser.dicts create mode 100644 UD_any/tokeparser_incremental_b0/train.bd create mode 100644 UD_any/tokeparser_incremental_b1/lemmatizer_case.cla create mode 100644 UD_any/tokeparser_incremental_b1/lemmatizer_lookup.cla create mode 100644 UD_any/tokeparser_incremental_b1/lemmatizer_rules.cla create mode 100644 UD_any/tokeparser_incremental_b1/machine.tm create mode 100644 UD_any/tokeparser_incremental_b1/morpho.cla create mode 100644 UD_any/tokeparser_incremental_b1/normal.tm create mode 100644 UD_any/tokeparser_incremental_b1/parser.cla create mode 100644 UD_any/tokeparser_incremental_b1/segmenter.cla create mode 100644 UD_any/tokeparser_incremental_b1/strategy.cla create mode 100644 UD_any/tokeparser_incremental_b1/tagger.cla create mode 100644 UD_any/tokeparser_incremental_b1/test.bd create mode 100644 UD_any/tokeparser_incremental_b1/tokenizer.cla create mode 100644 UD_any/tokeparser_incremental_b1/tokeparser.dicts create mode 100644 UD_any/tokeparser_incremental_b1/train.bd create mode 100644 UD_any/tokeparser_incremental_b2/lemmatizer_case.cla create mode 100644 UD_any/tokeparser_incremental_b2/lemmatizer_lookup.cla create mode 100644 UD_any/tokeparser_incremental_b2/lemmatizer_rules.cla create mode 100644 UD_any/tokeparser_incremental_b2/machine.tm create mode 100644 UD_any/tokeparser_incremental_b2/morpho.cla create mode 100644 UD_any/tokeparser_incremental_b2/normal.tm create mode 100644 UD_any/tokeparser_incremental_b2/parser.cla create mode 100644 UD_any/tokeparser_incremental_b2/segmenter.cla create mode 100644 UD_any/tokeparser_incremental_b2/strategy.cla create mode 100644 UD_any/tokeparser_incremental_b2/tagger.cla create mode 100644 UD_any/tokeparser_incremental_b2/test.bd create mode 100644 UD_any/tokeparser_incremental_b2/tokenizer.cla create mode 100644 UD_any/tokeparser_incremental_b2/tokeparser.dicts create mode 100644 UD_any/tokeparser_incremental_b2/train.bd create mode 100644 UD_any/tokeparser_incremental_b3/lemmatizer_case.cla create mode 100644 UD_any/tokeparser_incremental_b3/lemmatizer_lookup.cla create mode 100644 UD_any/tokeparser_incremental_b3/lemmatizer_rules.cla create mode 100644 UD_any/tokeparser_incremental_b3/machine.tm create mode 100644 UD_any/tokeparser_incremental_b3/morpho.cla create mode 100644 UD_any/tokeparser_incremental_b3/normal.tm create mode 100644 UD_any/tokeparser_incremental_b3/parser.cla create mode 100644 UD_any/tokeparser_incremental_b3/segmenter.cla create mode 100644 UD_any/tokeparser_incremental_b3/strategy.cla create mode 100644 UD_any/tokeparser_incremental_b3/tagger.cla create mode 100644 UD_any/tokeparser_incremental_b3/test.bd create mode 100644 UD_any/tokeparser_incremental_b3/tokenizer.cla create mode 100644 UD_any/tokeparser_incremental_b3/tokeparser.dicts create mode 100644 UD_any/tokeparser_incremental_b3/train.bd create mode 100644 UD_any/tokeparser_incremental_b4/lemmatizer_case.cla create mode 100644 UD_any/tokeparser_incremental_b4/lemmatizer_lookup.cla create mode 100644 UD_any/tokeparser_incremental_b4/lemmatizer_rules.cla create mode 100644 UD_any/tokeparser_incremental_b4/machine.tm create mode 100644 UD_any/tokeparser_incremental_b4/morpho.cla create mode 100644 UD_any/tokeparser_incremental_b4/normal.tm create mode 100644 UD_any/tokeparser_incremental_b4/parser.cla create mode 100644 UD_any/tokeparser_incremental_b4/segmenter.cla create mode 100644 UD_any/tokeparser_incremental_b4/strategy.cla create mode 100644 UD_any/tokeparser_incremental_b4/tagger.cla create mode 100644 UD_any/tokeparser_incremental_b4/test.bd create mode 100644 UD_any/tokeparser_incremental_b4/tokenizer.cla create mode 100644 UD_any/tokeparser_incremental_b4/tokeparser.dicts create mode 100644 UD_any/tokeparser_incremental_b4/train.bd create mode 100644 UD_any/tokeparser_incremental_b5/lemmatizer_case.cla create mode 100644 UD_any/tokeparser_incremental_b5/lemmatizer_lookup.cla create mode 100644 UD_any/tokeparser_incremental_b5/lemmatizer_rules.cla create mode 100644 UD_any/tokeparser_incremental_b5/machine.tm create mode 100644 UD_any/tokeparser_incremental_b5/morpho.cla create mode 100644 UD_any/tokeparser_incremental_b5/normal.tm create mode 100644 UD_any/tokeparser_incremental_b5/parser.cla create mode 100644 UD_any/tokeparser_incremental_b5/segmenter.cla create mode 100644 UD_any/tokeparser_incremental_b5/strategy.cla create mode 100644 UD_any/tokeparser_incremental_b5/tagger.cla create mode 100644 UD_any/tokeparser_incremental_b5/test.bd create mode 100644 UD_any/tokeparser_incremental_b5/tokenizer.cla create mode 100644 UD_any/tokeparser_incremental_b5/tokeparser.dicts create mode 100644 UD_any/tokeparser_incremental_b5/train.bd create mode 100644 UD_any/tokeparser_incremental_b6/lemmatizer_case.cla create mode 100644 UD_any/tokeparser_incremental_b6/lemmatizer_lookup.cla create mode 100644 UD_any/tokeparser_incremental_b6/lemmatizer_rules.cla create mode 100644 UD_any/tokeparser_incremental_b6/machine.tm create mode 100644 UD_any/tokeparser_incremental_b6/morpho.cla create mode 100644 UD_any/tokeparser_incremental_b6/normal.tm create mode 100644 UD_any/tokeparser_incremental_b6/parser.cla create mode 100644 UD_any/tokeparser_incremental_b6/segmenter.cla create mode 100644 UD_any/tokeparser_incremental_b6/strategy.cla create mode 100644 UD_any/tokeparser_incremental_b6/tagger.cla create mode 100644 UD_any/tokeparser_incremental_b6/test.bd create mode 100644 UD_any/tokeparser_incremental_b6/tokenizer.cla create mode 100644 UD_any/tokeparser_incremental_b6/tokeparser.dicts create mode 100644 UD_any/tokeparser_incremental_b6/train.bd diff --git a/UD_any/data/feature_models/b0/lemmatizer_rules_incremental.fm b/UD_any/data/feature_models/b0/lemmatizer_rules_incremental.fm new file mode 100644 index 0000000..033d2dd --- /dev/null +++ b/UD_any/data/feature_models/b0/lemmatizer_rules_incremental.fm @@ -0,0 +1,16 @@ +# FORM +b.0#FORM +# POS +b.0#POS +# MORPHO +b.0#MORPHO +# Suffixes +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# Prefixes +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 diff --git a/UD_any/data/feature_models/b0/morpho_incremental.fm b/UD_any/data/feature_models/b0/morpho_incremental.fm new file mode 100644 index 0000000..beca1d8 --- /dev/null +++ b/UD_any/data/feature_models/b0/morpho_incremental.fm @@ -0,0 +1,41 @@ +# FORM +b.0#FORM.fasttext +# POS +b.0#POS +# MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# EOS diff --git a/UD_any/data/feature_models/b0/parser_nofuture.fm b/UD_any/data/feature_models/b0/parser_nofuture.fm new file mode 100644 index 0000000..024e113 --- /dev/null +++ b/UD_any/data/feature_models/b0/parser_nofuture.fm @@ -0,0 +1,98 @@ +# LEMMA +s.0#LEMMA.fasttext +s.1#LEMMA.fasttext +s.2#LEMMA.fasttext +s.0.ldep#LEMMA.fasttext +s.1.ldep#LEMMA.fasttext +s.0.rdep#LEMMA.fasttext +s.1.rdep#LEMMA.fasttext +s.0.ldep.ldep#LEMMA.fasttext +s.1.ldep.ldep#LEMMA.fasttext +s.0.rdep.rdep#LEMMA.fasttext +s.1.rdep.rdep#LEMMA.fasttext +s.0.l2dep#LEMMA.fasttext +s.1.l2dep#LEMMA.fasttext +s.0.r2dep#LEMMA.fasttext +s.1.r2dep#LEMMA.fasttext +b.0#LEMMA.fasttext +b.0.ldep#LEMMA.fasttext +# POS +s.0#POS +s.1#POS +s.2#POS +s.0.ldep#POS +s.1.ldep#POS +s.0.rdep#POS +s.1.rdep#POS +s.0.ldep.ldep#POS +s.1.ldep.ldep#POS +s.0.rdep.rdep#POS +s.1.rdep.rdep#POS +s.0.l2dep#POS +s.1.l2dep#POS +s.0.r2dep#POS +s.1.r2dep#POS +b.0#POS +b.0.ldep#POS +s.0.gov#POS +s.1.gov#POS +# MORPHO +s.0#MORPHO +s.1#MORPHO +s.2#MORPHO +s.0.ldep#MORPHO +s.1.ldep#MORPHO +s.0.rdep#MORPHO +s.1.rdep#MORPHO +s.0.ldep.ldep#MORPHO +s.1.ldep.ldep#MORPHO +s.0.rdep.rdep#MORPHO +s.1.rdep.rdep#MORPHO +s.0.l2dep#MORPHO +s.1.l2dep#MORPHO +s.0.r2dep#MORPHO +s.1.r2dep#MORPHO +b.0#MORPHO +b.0.ldep#MORPHO +s.1.gov#MORPHO +s.0.gov#MORPHO +# LABELS +s.0.ldep#LABEL +s.1.ldep#LABEL +s.0.rdep#LABEL +s.1.rdep#LABEL +s.0.ldep.ldep#LABEL +s.1.ldep.ldep#LABEL +s.0.rdep.rdep#LABEL +s.1.rdep.rdep#LABEL +s.0.l2dep#LABEL +s.1.l2dep#LABEL +s.0.r2dep#LABEL +s.1.r2dep#LABEL +b.0.ldep#LABEL +b.0#LABEL +s.0#LABEL +s.1#LABEL +# DISTANCE +s.0#DIST.s.1 +b.0#DIST.s.0 +# VALENCY +s.0#nbr +s.1#nbr +s.0#nbl +s.1#nbl +# UPPERCASE +b.0#FORM.U +# HISTORY +tc.0 +tc.1 +tc.2 +tc.3 +tc.4 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b0/tagger_incremental.fm b/UD_any/data/feature_models/b0/tagger_incremental.fm new file mode 100644 index 0000000..e1f2841 --- /dev/null +++ b/UD_any/data/feature_models/b0/tagger_incremental.fm @@ -0,0 +1,24 @@ +# FORM +b.0#FORM.fasttext +# POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b0/tokenizer_incremental.fm b/UD_any/data/feature_models/b0/tokenizer_incremental.fm new file mode 100644 index 0000000..0d674e8 --- /dev/null +++ b/UD_any/data/feature_models/b0/tokenizer_incremental.fm @@ -0,0 +1,43 @@ +# FORM +b.0#FORM.fasttext +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# POS +b.0#POS +# MORPHO +b.0#MORPHO diff --git a/UD_any/data/feature_models/b1/lemmatizer_rules_incremental.fm b/UD_any/data/feature_models/b1/lemmatizer_rules_incremental.fm new file mode 100644 index 0000000..ab8cd8c --- /dev/null +++ b/UD_any/data/feature_models/b1/lemmatizer_rules_incremental.fm @@ -0,0 +1,21 @@ +# FORM +b.-1#FORM +b.0#FORM +# POS +b.-1#POS +b.0#POS +# MORPHO +b.-1#MORPHO +b.0#MORPHO +# Suffixes +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# Prefixes +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# EOS +b.-1#EOS diff --git a/UD_any/data/feature_models/b1/morpho_incremental.fm b/UD_any/data/feature_models/b1/morpho_incremental.fm new file mode 100644 index 0000000..79e2668 --- /dev/null +++ b/UD_any/data/feature_models/b1/morpho_incremental.fm @@ -0,0 +1,45 @@ +# FORM +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-1#POS +b.0#POS +# MORPHO +b.-1#MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# EOS +b.-1#EOS diff --git a/UD_any/data/feature_models/b1/parser_nofuture.fm b/UD_any/data/feature_models/b1/parser_nofuture.fm new file mode 100644 index 0000000..f8fc04f --- /dev/null +++ b/UD_any/data/feature_models/b1/parser_nofuture.fm @@ -0,0 +1,106 @@ +# LEMMA +s.0#LEMMA.fasttext +s.1#LEMMA.fasttext +s.2#LEMMA.fasttext +s.0.ldep#LEMMA.fasttext +s.1.ldep#LEMMA.fasttext +s.0.rdep#LEMMA.fasttext +s.1.rdep#LEMMA.fasttext +s.0.ldep.ldep#LEMMA.fasttext +s.1.ldep.ldep#LEMMA.fasttext +s.0.rdep.rdep#LEMMA.fasttext +s.1.rdep.rdep#LEMMA.fasttext +s.0.l2dep#LEMMA.fasttext +s.1.l2dep#LEMMA.fasttext +s.0.r2dep#LEMMA.fasttext +s.1.r2dep#LEMMA.fasttext +b.-1#LEMMA.fasttext +b.0#LEMMA.fasttext +b.0.ldep#LEMMA.fasttext +# POS +s.0#POS +s.1#POS +s.2#POS +s.0.ldep#POS +s.1.ldep#POS +s.0.rdep#POS +s.1.rdep#POS +s.0.ldep.ldep#POS +s.1.ldep.ldep#POS +s.0.rdep.rdep#POS +s.1.rdep.rdep#POS +s.0.l2dep#POS +s.1.l2dep#POS +s.0.r2dep#POS +s.1.r2dep#POS +b.-1#POS +b.0#POS +b.0.ldep#POS +b.-1.gov#POS +s.0.gov#POS +s.1.gov#POS +# MORPHO +s.0#MORPHO +s.1#MORPHO +s.2#MORPHO +s.0.ldep#MORPHO +s.1.ldep#MORPHO +s.0.rdep#MORPHO +s.1.rdep#MORPHO +s.0.ldep.ldep#MORPHO +s.1.ldep.ldep#MORPHO +s.0.rdep.rdep#MORPHO +s.1.rdep.rdep#MORPHO +s.0.l2dep#MORPHO +s.1.l2dep#MORPHO +s.0.r2dep#MORPHO +s.1.r2dep#MORPHO +b.-1#MORPHO +b.0#MORPHO +b.0.ldep#MORPHO +s.1.gov#MORPHO +b.-1.gov#MORPHO +s.0.gov#MORPHO +# LABELS +s.0.ldep#LABEL +s.1.ldep#LABEL +s.0.rdep#LABEL +s.1.rdep#LABEL +s.0.ldep.ldep#LABEL +s.1.ldep.ldep#LABEL +s.0.rdep.rdep#LABEL +s.1.rdep.rdep#LABEL +s.0.l2dep#LABEL +s.1.l2dep#LABEL +s.0.r2dep#LABEL +s.1.r2dep#LABEL +b.0.ldep#LABEL +b.-1#LABEL +b.0#LABEL +s.0#LABEL +s.1#LABEL +# DISTANCE +s.0#DIST.s.1 +b.0#DIST.s.0 +# VALENCY +s.0#nbr +s.1#nbr +s.0#nbl +s.1#nbl +# UPPERCASE +b.0#FORM.U +# EOS +b.-1#EOS +# HISTORY +tc.0 +tc.1 +tc.2 +tc.3 +tc.4 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b1/tagger_incremental.fm b/UD_any/data/feature_models/b1/tagger_incremental.fm new file mode 100644 index 0000000..a1be9e9 --- /dev/null +++ b/UD_any/data/feature_models/b1/tagger_incremental.fm @@ -0,0 +1,43 @@ +# FORM +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-1#POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# MORPHO +b.-1#MORPHO +# EOS +b.-1#EOS diff --git a/UD_any/data/feature_models/b1/tokenizer_incremental.fm b/UD_any/data/feature_models/b1/tokenizer_incremental.fm new file mode 100644 index 0000000..d94534d --- /dev/null +++ b/UD_any/data/feature_models/b1/tokenizer_incremental.fm @@ -0,0 +1,51 @@ +# FORM +b.-1#FORM.fasttext +b.0#FORM.fasttext +# UPPERCASE +b.-1#FORM.U +# LENGTH +b.-1#FORM.LEN +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# POS +b.-1#POS +b.0#POS +# MORPHO +b.-1#MORPHO +b.0#MORPHO +# EOS +b.-1#EOS diff --git a/UD_any/data/feature_models/b2/lemmatizer_rules_incremental.fm b/UD_any/data/feature_models/b2/lemmatizer_rules_incremental.fm new file mode 100644 index 0000000..ef29bb6 --- /dev/null +++ b/UD_any/data/feature_models/b2/lemmatizer_rules_incremental.fm @@ -0,0 +1,25 @@ +# FORM +b.-2#FORM +b.-1#FORM +b.0#FORM +# POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# Suffixes +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# Prefixes +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# EOS +b.-1#EOS +b.-2#EOS diff --git a/UD_any/data/feature_models/b2/morpho_incremental.fm b/UD_any/data/feature_models/b2/morpho_incremental.fm new file mode 100644 index 0000000..b9075f5 --- /dev/null +++ b/UD_any/data/feature_models/b2/morpho_incremental.fm @@ -0,0 +1,49 @@ +# FORM +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# EOS +b.-1#EOS +b.-2#EOS diff --git a/UD_any/data/feature_models/b2/parser_nofuture.fm b/UD_any/data/feature_models/b2/parser_nofuture.fm new file mode 100644 index 0000000..ad23ea9 --- /dev/null +++ b/UD_any/data/feature_models/b2/parser_nofuture.fm @@ -0,0 +1,111 @@ +# LEMMA +s.0#LEMMA.fasttext +s.1#LEMMA.fasttext +s.2#LEMMA.fasttext +s.0.ldep#LEMMA.fasttext +s.1.ldep#LEMMA.fasttext +s.0.rdep#LEMMA.fasttext +s.1.rdep#LEMMA.fasttext +s.0.ldep.ldep#LEMMA.fasttext +s.1.ldep.ldep#LEMMA.fasttext +s.0.rdep.rdep#LEMMA.fasttext +s.1.rdep.rdep#LEMMA.fasttext +s.0.l2dep#LEMMA.fasttext +s.1.l2dep#LEMMA.fasttext +s.0.r2dep#LEMMA.fasttext +s.1.r2dep#LEMMA.fasttext +b.-2#LEMMA.fasttext +b.-1#LEMMA.fasttext +b.0#LEMMA.fasttext +b.0.ldep#LEMMA.fasttext +# POS +s.0#POS +s.1#POS +s.2#POS +s.0.ldep#POS +s.1.ldep#POS +s.0.rdep#POS +s.1.rdep#POS +s.0.ldep.ldep#POS +s.1.ldep.ldep#POS +s.0.rdep.rdep#POS +s.1.rdep.rdep#POS +s.0.l2dep#POS +s.1.l2dep#POS +s.0.r2dep#POS +s.1.r2dep#POS +b.-2#POS +b.-1#POS +b.0#POS +b.0.ldep#POS +b.-1.gov#POS +s.0.gov#POS +s.1.gov#POS +# MORPHO +s.0#MORPHO +s.1#MORPHO +s.2#MORPHO +s.0.ldep#MORPHO +s.1.ldep#MORPHO +s.0.rdep#MORPHO +s.1.rdep#MORPHO +s.0.ldep.ldep#MORPHO +s.1.ldep.ldep#MORPHO +s.0.rdep.rdep#MORPHO +s.1.rdep.rdep#MORPHO +s.0.l2dep#MORPHO +s.1.l2dep#MORPHO +s.0.r2dep#MORPHO +s.1.r2dep#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +b.0.ldep#MORPHO +s.1.gov#MORPHO +b.-1.gov#MORPHO +s.0.gov#MORPHO +# LABELS +s.0.ldep#LABEL +s.1.ldep#LABEL +s.0.rdep#LABEL +s.1.rdep#LABEL +s.0.ldep.ldep#LABEL +s.1.ldep.ldep#LABEL +s.0.rdep.rdep#LABEL +s.1.rdep.rdep#LABEL +s.0.l2dep#LABEL +s.1.l2dep#LABEL +s.0.r2dep#LABEL +s.1.r2dep#LABEL +b.0.ldep#LABEL +b.-2#LABEL +b.-1#LABEL +b.0#LABEL +s.0#LABEL +s.1#LABEL +# DISTANCE +s.0#DIST.s.1 +b.0#DIST.s.0 +# VALENCY +s.0#nbr +s.1#nbr +s.0#nbl +s.1#nbl +# UPPERCASE +b.0#FORM.U +# EOS +b.-1#EOS +b.-2#EOS +# HISTORY +tc.0 +tc.1 +tc.2 +tc.3 +tc.4 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b2/tagger_incremental.fm b/UD_any/data/feature_models/b2/tagger_incremental.fm new file mode 100644 index 0000000..b8b748f --- /dev/null +++ b/UD_any/data/feature_models/b2/tagger_incremental.fm @@ -0,0 +1,47 @@ +# FORM +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-2#POS +b.-1#POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# MORPHO +b.-2#MORPHO +b.-1#MORPHO +# EOS +b.-1#EOS +b.-2#EOS diff --git a/UD_any/data/feature_models/b2/tokenizer_incremental.fm b/UD_any/data/feature_models/b2/tokenizer_incremental.fm new file mode 100644 index 0000000..06d3e51 --- /dev/null +++ b/UD_any/data/feature_models/b2/tokenizer_incremental.fm @@ -0,0 +1,55 @@ +# FORM +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# UPPERCASE +b.-1#FORM.U +# LENGTH +b.-1#FORM.LEN +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# EOS +b.-1#EOS +b.-2#EOS diff --git a/UD_any/data/feature_models/b3/lemmatizer_rules_incremental.fm b/UD_any/data/feature_models/b3/lemmatizer_rules_incremental.fm new file mode 100644 index 0000000..4ac2697 --- /dev/null +++ b/UD_any/data/feature_models/b3/lemmatizer_rules_incremental.fm @@ -0,0 +1,29 @@ +# FORM +b.-3#FORM +b.-2#FORM +b.-1#FORM +b.0#FORM +# POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# Suffixes +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# Prefixes +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS diff --git a/UD_any/data/feature_models/b3/morpho_incremental.fm b/UD_any/data/feature_models/b3/morpho_incremental.fm new file mode 100644 index 0000000..025f206 --- /dev/null +++ b/UD_any/data/feature_models/b3/morpho_incremental.fm @@ -0,0 +1,53 @@ +# FORM +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS diff --git a/UD_any/data/feature_models/b3/parser_nofuture.fm b/UD_any/data/feature_models/b3/parser_nofuture.fm new file mode 100644 index 0000000..a999d61 --- /dev/null +++ b/UD_any/data/feature_models/b3/parser_nofuture.fm @@ -0,0 +1,116 @@ +# LEMMA +s.0#LEMMA.fasttext +s.1#LEMMA.fasttext +s.2#LEMMA.fasttext +s.0.ldep#LEMMA.fasttext +s.1.ldep#LEMMA.fasttext +s.0.rdep#LEMMA.fasttext +s.1.rdep#LEMMA.fasttext +s.0.ldep.ldep#LEMMA.fasttext +s.1.ldep.ldep#LEMMA.fasttext +s.0.rdep.rdep#LEMMA.fasttext +s.1.rdep.rdep#LEMMA.fasttext +s.0.l2dep#LEMMA.fasttext +s.1.l2dep#LEMMA.fasttext +s.0.r2dep#LEMMA.fasttext +s.1.r2dep#LEMMA.fasttext +b.-3#LEMMA.fasttext +b.-2#LEMMA.fasttext +b.-1#LEMMA.fasttext +b.0#LEMMA.fasttext +b.0.ldep#LEMMA.fasttext +# POS +s.0#POS +s.1#POS +s.2#POS +s.0.ldep#POS +s.1.ldep#POS +s.0.rdep#POS +s.1.rdep#POS +s.0.ldep.ldep#POS +s.1.ldep.ldep#POS +s.0.rdep.rdep#POS +s.1.rdep.rdep#POS +s.0.l2dep#POS +s.1.l2dep#POS +s.0.r2dep#POS +s.1.r2dep#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +b.0.ldep#POS +b.-1.gov#POS +s.0.gov#POS +s.1.gov#POS +# MORPHO +s.0#MORPHO +s.1#MORPHO +s.2#MORPHO +s.0.ldep#MORPHO +s.1.ldep#MORPHO +s.0.rdep#MORPHO +s.1.rdep#MORPHO +s.0.ldep.ldep#MORPHO +s.1.ldep.ldep#MORPHO +s.0.rdep.rdep#MORPHO +s.1.rdep.rdep#MORPHO +s.0.l2dep#MORPHO +s.1.l2dep#MORPHO +s.0.r2dep#MORPHO +s.1.r2dep#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +b.0.ldep#MORPHO +s.1.gov#MORPHO +b.-1.gov#MORPHO +s.0.gov#MORPHO +# LABELS +s.0.ldep#LABEL +s.1.ldep#LABEL +s.0.rdep#LABEL +s.1.rdep#LABEL +s.0.ldep.ldep#LABEL +s.1.ldep.ldep#LABEL +s.0.rdep.rdep#LABEL +s.1.rdep.rdep#LABEL +s.0.l2dep#LABEL +s.1.l2dep#LABEL +s.0.r2dep#LABEL +s.1.r2dep#LABEL +b.0.ldep#LABEL +b.-3#LABEL +b.-2#LABEL +b.-1#LABEL +b.0#LABEL +s.0#LABEL +s.1#LABEL +# DISTANCE +s.0#DIST.s.1 +b.0#DIST.s.0 +# VALENCY +s.0#nbr +s.1#nbr +s.0#nbl +s.1#nbl +# UPPERCASE +b.0#FORM.U +# EOS +b.-3#EOS +b.-2#EOS +b.-1#EOS +# HISTORY +tc.0 +tc.1 +tc.2 +tc.3 +tc.4 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b3/tagger_incremental.fm b/UD_any/data/feature_models/b3/tagger_incremental.fm new file mode 100644 index 0000000..a39eb9b --- /dev/null +++ b/UD_any/data/feature_models/b3/tagger_incremental.fm @@ -0,0 +1,51 @@ +# FORM +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-3#POS +b.-2#POS +b.-1#POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS diff --git a/UD_any/data/feature_models/b3/tokenizer_incremental.fm b/UD_any/data/feature_models/b3/tokenizer_incremental.fm new file mode 100644 index 0000000..b066468 --- /dev/null +++ b/UD_any/data/feature_models/b3/tokenizer_incremental.fm @@ -0,0 +1,59 @@ +# FORM +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# UPPERCASE +b.-1#FORM.U +# LENGTH +b.-1#FORM.LEN +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS diff --git a/UD_any/data/feature_models/b4/lemmatizer_rules_incremental.fm b/UD_any/data/feature_models/b4/lemmatizer_rules_incremental.fm new file mode 100644 index 0000000..fd820ad --- /dev/null +++ b/UD_any/data/feature_models/b4/lemmatizer_rules_incremental.fm @@ -0,0 +1,30 @@ +# FORM +b.-1#FORM +b.0#FORM +# POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# Suffixes +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# Prefixes +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS diff --git a/UD_any/data/feature_models/b4/morpho_incremental.fm b/UD_any/data/feature_models/b4/morpho_incremental.fm new file mode 100644 index 0000000..951491c --- /dev/null +++ b/UD_any/data/feature_models/b4/morpho_incremental.fm @@ -0,0 +1,57 @@ +# FORM +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS diff --git a/UD_any/data/feature_models/b4/parser_nofuture.fm b/UD_any/data/feature_models/b4/parser_nofuture.fm new file mode 100644 index 0000000..7558417 --- /dev/null +++ b/UD_any/data/feature_models/b4/parser_nofuture.fm @@ -0,0 +1,121 @@ +# LEMMA +s.0#LEMMA.fasttext +s.1#LEMMA.fasttext +s.2#LEMMA.fasttext +s.0.ldep#LEMMA.fasttext +s.1.ldep#LEMMA.fasttext +s.0.rdep#LEMMA.fasttext +s.1.rdep#LEMMA.fasttext +s.0.ldep.ldep#LEMMA.fasttext +s.1.ldep.ldep#LEMMA.fasttext +s.0.rdep.rdep#LEMMA.fasttext +s.1.rdep.rdep#LEMMA.fasttext +s.0.l2dep#LEMMA.fasttext +s.1.l2dep#LEMMA.fasttext +s.0.r2dep#LEMMA.fasttext +s.1.r2dep#LEMMA.fasttext +b.-4#LEMMA.fasttext +b.-3#LEMMA.fasttext +b.-2#LEMMA.fasttext +b.-1#LEMMA.fasttext +b.0#LEMMA.fasttext +b.0.ldep#LEMMA.fasttext +# POS +s.0#POS +s.1#POS +s.2#POS +s.0.ldep#POS +s.1.ldep#POS +s.0.rdep#POS +s.1.rdep#POS +s.0.ldep.ldep#POS +s.1.ldep.ldep#POS +s.0.rdep.rdep#POS +s.1.rdep.rdep#POS +s.0.l2dep#POS +s.1.l2dep#POS +s.0.r2dep#POS +s.1.r2dep#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +b.0.ldep#POS +b.-1.gov#POS +s.0.gov#POS +s.1.gov#POS +# MORPHO +s.0#MORPHO +s.1#MORPHO +s.2#MORPHO +s.0.ldep#MORPHO +s.1.ldep#MORPHO +s.0.rdep#MORPHO +s.1.rdep#MORPHO +s.0.ldep.ldep#MORPHO +s.1.ldep.ldep#MORPHO +s.0.rdep.rdep#MORPHO +s.1.rdep.rdep#MORPHO +s.0.l2dep#MORPHO +s.1.l2dep#MORPHO +s.0.r2dep#MORPHO +s.1.r2dep#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +b.0.ldep#MORPHO +s.1.gov#MORPHO +b.-1.gov#MORPHO +s.0.gov#MORPHO +# LABELS +s.0.ldep#LABEL +s.1.ldep#LABEL +s.0.rdep#LABEL +s.1.rdep#LABEL +s.0.ldep.ldep#LABEL +s.1.ldep.ldep#LABEL +s.0.rdep.rdep#LABEL +s.1.rdep.rdep#LABEL +s.0.l2dep#LABEL +s.1.l2dep#LABEL +s.0.r2dep#LABEL +s.1.r2dep#LABEL +b.0.ldep#LABEL +b.-4#LABEL +b.-3#LABEL +b.-2#LABEL +b.-1#LABEL +b.0#LABEL +s.0#LABEL +s.1#LABEL +# DISTANCE +s.0#DIST.s.1 +b.0#DIST.s.0 +# VALENCY +s.0#nbr +s.1#nbr +s.0#nbl +s.1#nbl +# UPPERCASE +b.0#FORM.U +# EOS +b.-4#EOS +b.-3#EOS +b.-2#EOS +b.-1#EOS +# HISTORY +tc.0 +tc.1 +tc.2 +tc.3 +tc.4 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b4/tagger_incremental.fm b/UD_any/data/feature_models/b4/tagger_incremental.fm new file mode 100644 index 0000000..f47593f --- /dev/null +++ b/UD_any/data/feature_models/b4/tagger_incremental.fm @@ -0,0 +1,55 @@ +# FORM +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS diff --git a/UD_any/data/feature_models/b4/tokenizer_incremental.fm b/UD_any/data/feature_models/b4/tokenizer_incremental.fm new file mode 100644 index 0000000..544af4f --- /dev/null +++ b/UD_any/data/feature_models/b4/tokenizer_incremental.fm @@ -0,0 +1,63 @@ +# FORM +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# UPPERCASE +b.-1#FORM.U +# LENGTH +b.-1#FORM.LEN +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS diff --git a/UD_any/data/feature_models/b5/lemmatizer_rules_incremental.fm b/UD_any/data/feature_models/b5/lemmatizer_rules_incremental.fm new file mode 100644 index 0000000..0eabe07 --- /dev/null +++ b/UD_any/data/feature_models/b5/lemmatizer_rules_incremental.fm @@ -0,0 +1,33 @@ +# FORM +b.-1#FORM +b.0#FORM +# POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# Suffixes +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# Prefixes +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS diff --git a/UD_any/data/feature_models/b5/morpho_incremental.fm b/UD_any/data/feature_models/b5/morpho_incremental.fm new file mode 100644 index 0000000..c0d9513 --- /dev/null +++ b/UD_any/data/feature_models/b5/morpho_incremental.fm @@ -0,0 +1,61 @@ +# FORM +b.-5#FORM.fasttext +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS diff --git a/UD_any/data/feature_models/b5/parser_nofuture.fm b/UD_any/data/feature_models/b5/parser_nofuture.fm new file mode 100644 index 0000000..1b9ba09 --- /dev/null +++ b/UD_any/data/feature_models/b5/parser_nofuture.fm @@ -0,0 +1,126 @@ +# LEMMA +s.0#LEMMA.fasttext +s.1#LEMMA.fasttext +s.2#LEMMA.fasttext +s.0.ldep#LEMMA.fasttext +s.1.ldep#LEMMA.fasttext +s.0.rdep#LEMMA.fasttext +s.1.rdep#LEMMA.fasttext +s.0.ldep.ldep#LEMMA.fasttext +s.1.ldep.ldep#LEMMA.fasttext +s.0.rdep.rdep#LEMMA.fasttext +s.1.rdep.rdep#LEMMA.fasttext +s.0.l2dep#LEMMA.fasttext +s.1.l2dep#LEMMA.fasttext +s.0.r2dep#LEMMA.fasttext +s.1.r2dep#LEMMA.fasttext +b.-5#LEMMA.fasttext +b.-4#LEMMA.fasttext +b.-3#LEMMA.fasttext +b.-2#LEMMA.fasttext +b.-1#LEMMA.fasttext +b.0#LEMMA.fasttext +b.0.ldep#LEMMA.fasttext +# POS +s.0#POS +s.1#POS +s.2#POS +s.0.ldep#POS +s.1.ldep#POS +s.0.rdep#POS +s.1.rdep#POS +s.0.ldep.ldep#POS +s.1.ldep.ldep#POS +s.0.rdep.rdep#POS +s.1.rdep.rdep#POS +s.0.l2dep#POS +s.1.l2dep#POS +s.0.r2dep#POS +s.1.r2dep#POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +b.0.ldep#POS +b.-1.gov#POS +s.0.gov#POS +s.1.gov#POS +# MORPHO +s.0#MORPHO +s.1#MORPHO +s.2#MORPHO +s.0.ldep#MORPHO +s.1.ldep#MORPHO +s.0.rdep#MORPHO +s.1.rdep#MORPHO +s.0.ldep.ldep#MORPHO +s.1.ldep.ldep#MORPHO +s.0.rdep.rdep#MORPHO +s.1.rdep.rdep#MORPHO +s.0.l2dep#MORPHO +s.1.l2dep#MORPHO +s.0.r2dep#MORPHO +s.1.r2dep#MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +b.0.ldep#MORPHO +s.1.gov#MORPHO +b.-1.gov#MORPHO +s.0.gov#MORPHO +# LABELS +s.0.ldep#LABEL +s.1.ldep#LABEL +s.0.rdep#LABEL +s.1.rdep#LABEL +s.0.ldep.ldep#LABEL +s.1.ldep.ldep#LABEL +s.0.rdep.rdep#LABEL +s.1.rdep.rdep#LABEL +s.0.l2dep#LABEL +s.1.l2dep#LABEL +s.0.r2dep#LABEL +s.1.r2dep#LABEL +b.0.ldep#LABEL +b.-5#LABEL +b.-4#LABEL +b.-3#LABEL +b.-2#LABEL +b.-1#LABEL +b.0#LABEL +s.0#LABEL +s.1#LABEL +# DISTANCE +s.0#DIST.s.1 +b.0#DIST.s.0 +# VALENCY +s.0#nbr +s.1#nbr +s.0#nbl +s.1#nbl +# UPPERCASE +b.0#FORM.U +# EOS +b.-5#EOS +b.-4#EOS +b.-3#EOS +b.-2#EOS +b.-1#EOS +# HISTORY +tc.0 +tc.1 +tc.2 +tc.3 +tc.4 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b5/tagger_incremental.fm b/UD_any/data/feature_models/b5/tagger_incremental.fm new file mode 100644 index 0000000..47c3983 --- /dev/null +++ b/UD_any/data/feature_models/b5/tagger_incremental.fm @@ -0,0 +1,59 @@ +# FORM +b.-5#FORM.fasttext +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS diff --git a/UD_any/data/feature_models/b5/tokenizer_incremental.fm b/UD_any/data/feature_models/b5/tokenizer_incremental.fm new file mode 100644 index 0000000..48e4816 --- /dev/null +++ b/UD_any/data/feature_models/b5/tokenizer_incremental.fm @@ -0,0 +1,67 @@ +# FORM +b.-5#FORM.fasttext +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# UPPERCASE +b.-1#FORM.U +# LENGTH +b.-1#FORM.LEN +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS diff --git a/UD_any/data/feature_models/b6/lemmatizer_rules_incremental.fm b/UD_any/data/feature_models/b6/lemmatizer_rules_incremental.fm new file mode 100644 index 0000000..e13a591 --- /dev/null +++ b/UD_any/data/feature_models/b6/lemmatizer_rules_incremental.fm @@ -0,0 +1,36 @@ +# FORM +b.-1#FORM +b.0#FORM +# POS +b.-6#POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-6#MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# Suffixes +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# Prefixes +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS +b.-6#EOS diff --git a/UD_any/data/feature_models/b6/morpho_incremental.fm b/UD_any/data/feature_models/b6/morpho_incremental.fm new file mode 100644 index 0000000..89e4914 --- /dev/null +++ b/UD_any/data/feature_models/b6/morpho_incremental.fm @@ -0,0 +1,65 @@ +# FORM +b.-6#FORM.fasttext +b.-5#FORM.fasttext +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-6#POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-6#MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS +b.-6#EOS diff --git a/UD_any/data/feature_models/b6/parser_nofuture.fm b/UD_any/data/feature_models/b6/parser_nofuture.fm new file mode 100644 index 0000000..8cab2b7 --- /dev/null +++ b/UD_any/data/feature_models/b6/parser_nofuture.fm @@ -0,0 +1,131 @@ +# LEMMA +s.0#LEMMA.fasttext +s.1#LEMMA.fasttext +s.2#LEMMA.fasttext +s.0.ldep#LEMMA.fasttext +s.1.ldep#LEMMA.fasttext +s.0.rdep#LEMMA.fasttext +s.1.rdep#LEMMA.fasttext +s.0.ldep.ldep#LEMMA.fasttext +s.1.ldep.ldep#LEMMA.fasttext +s.0.rdep.rdep#LEMMA.fasttext +s.1.rdep.rdep#LEMMA.fasttext +s.0.l2dep#LEMMA.fasttext +s.1.l2dep#LEMMA.fasttext +s.0.r2dep#LEMMA.fasttext +s.1.r2dep#LEMMA.fasttext +b.-6#LEMMA.fasttext +b.-5#LEMMA.fasttext +b.-4#LEMMA.fasttext +b.-3#LEMMA.fasttext +b.-2#LEMMA.fasttext +b.-1#LEMMA.fasttext +b.0#LEMMA.fasttext +b.0.ldep#LEMMA.fasttext +# POS +s.0#POS +s.1#POS +s.2#POS +s.0.ldep#POS +s.1.ldep#POS +s.0.rdep#POS +s.1.rdep#POS +s.0.ldep.ldep#POS +s.1.ldep.ldep#POS +s.0.rdep.rdep#POS +s.1.rdep.rdep#POS +s.0.l2dep#POS +s.1.l2dep#POS +s.0.r2dep#POS +s.1.r2dep#POS +b.-6#POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +b.0.ldep#POS +b.-1.gov#POS +s.0.gov#POS +s.1.gov#POS +# MORPHO +s.0#MORPHO +s.1#MORPHO +s.2#MORPHO +s.0.ldep#MORPHO +s.1.ldep#MORPHO +s.0.rdep#MORPHO +s.1.rdep#MORPHO +s.0.ldep.ldep#MORPHO +s.1.ldep.ldep#MORPHO +s.0.rdep.rdep#MORPHO +s.1.rdep.rdep#MORPHO +s.0.l2dep#MORPHO +s.1.l2dep#MORPHO +s.0.r2dep#MORPHO +s.1.r2dep#MORPHO +b.-6#MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +b.0.ldep#MORPHO +s.1.gov#MORPHO +b.-1.gov#MORPHO +s.0.gov#MORPHO +# LABELS +s.0.ldep#LABEL +s.1.ldep#LABEL +s.0.rdep#LABEL +s.1.rdep#LABEL +s.0.ldep.ldep#LABEL +s.1.ldep.ldep#LABEL +s.0.rdep.rdep#LABEL +s.1.rdep.rdep#LABEL +s.0.l2dep#LABEL +s.1.l2dep#LABEL +s.0.r2dep#LABEL +s.1.r2dep#LABEL +b.0.ldep#LABEL +b.-6#LABEL +b.-5#LABEL +b.-4#LABEL +b.-3#LABEL +b.-2#LABEL +b.-1#LABEL +b.0#LABEL +s.0#LABEL +s.1#LABEL +# DISTANCE +s.0#DIST.s.1 +b.0#DIST.s.0 +# VALENCY +s.0#nbr +s.1#nbr +s.0#nbl +s.1#nbl +# UPPERCASE +b.0#FORM.U +# EOS +b.-6#EOS +b.-5#EOS +b.-4#EOS +b.-3#EOS +b.-2#EOS +b.-1#EOS +# HISTORY +tc.0 +tc.1 +tc.2 +tc.3 +tc.4 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 diff --git a/UD_any/data/feature_models/b6/tagger_incremental.fm b/UD_any/data/feature_models/b6/tagger_incremental.fm new file mode 100644 index 0000000..43774cd --- /dev/null +++ b/UD_any/data/feature_models/b6/tagger_incremental.fm @@ -0,0 +1,63 @@ +# FORM +b.-6#FORM.fasttext +b.-5#FORM.fasttext +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# POS +b.-6#POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +# UPPERCASE +b.0#FORM.U +# LENGTH +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# MORPHO +b.-6#MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS +b.-6#EOS diff --git a/UD_any/data/feature_models/b6/tokenizer_incremental.fm b/UD_any/data/feature_models/b6/tokenizer_incremental.fm new file mode 100644 index 0000000..960b867 --- /dev/null +++ b/UD_any/data/feature_models/b6/tokenizer_incremental.fm @@ -0,0 +1,71 @@ +# FORM +b.-6#FORM.fasttext +b.-5#FORM.fasttext +b.-4#FORM.fasttext +b.-3#FORM.fasttext +b.-2#FORM.fasttext +b.-1#FORM.fasttext +b.0#FORM.fasttext +# UPPERCASE +b.-1#FORM.U +# LENGTH +b.-1#FORM.LEN +b.0#FORM.LEN +# SUFFIXES +b.0#FORM.PART.-4.-4 +b.0#FORM.PART.-3.-3 +b.0#FORM.PART.-2.-2 +b.0#FORM.PART.-1.-1 +# PREFIXES +b.0#FORM.PART.0.0 +b.0#FORM.PART.1.1 +b.0#FORM.PART.2.2 +b.0#FORM.PART.3.3 +# RAW INPUT +raw.-5 +raw.-4 +raw.-3 +raw.-2 +raw.-1 +raw.0 +raw.2 +raw.3 +raw.4 +raw.5 +raw.6 +# Features methode incrementale +s.0#FORM.fasttext +s.0.ldep#FORM.fasttext +s.0#POS +s.0.ldep#POS +s.0#MORPHO +s.0.ldep#MORPHO +s.0.ldep.ldep#MORPHO +s.0.ldep#LABEL +s.0#DIST.s.1 +s.0#nbl +s.0#LABEL +b.0#LABEL +# POS +b.-6#POS +b.-5#POS +b.-4#POS +b.-3#POS +b.-2#POS +b.-1#POS +b.0#POS +# MORPHO +b.-6#MORPHO +b.-5#MORPHO +b.-4#MORPHO +b.-3#MORPHO +b.-2#MORPHO +b.-1#MORPHO +b.0#MORPHO +# EOS +b.-1#EOS +b.-2#EOS +b.-3#EOS +b.-4#EOS +b.-5#EOS +b.-6#EOS diff --git a/UD_any/tokeparser_incremental_b0/lemmatizer_case.cla b/UD_any/tokeparser_incremental_b0/lemmatizer_case.cla new file mode 100644 index 0000000..fc9f9f5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/b0/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_case.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b0/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental_b0/lemmatizer_lookup.cla new file mode 100644 index 0000000..41f63b2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental_b0/lemmatizer_rules.cla b/UD_any/tokeparser_incremental_b0/lemmatizer_rules.cla new file mode 100644 index 0000000..411db9f --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/b0/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_rules.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b0/machine.tm b/UD_any/tokeparser_incremental_b0/machine.tm new file mode 100644 index 0000000..f591178 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental_b0/morpho.cla b/UD_any/tokeparser_incremental_b0/morpho.cla new file mode 100644 index 0000000..a0bbaf6 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/b0/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b0/normal.tm b/UD_any/tokeparser_incremental_b0/normal.tm new file mode 100644 index 0000000..77ebff3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental_b0/parser.cla b/UD_any/tokeparser_incremental_b0/parser.cla new file mode 100644 index 0000000..f8f6f80 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/b0/parser_nofuture.fm +Action Set : data/parser.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b0/segmenter.cla b/UD_any/tokeparser_incremental_b0/segmenter.cla new file mode 100644 index 0000000..e93321f --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/b0/parser_nofuture.fm +Action Set : data/segmenter.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b0/strategy.cla b/UD_any/tokeparser_incremental_b0/strategy.cla new file mode 100644 index 0000000..fcf66b5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental_b0/tagger.cla b/UD_any/tokeparser_incremental_b0/tagger.cla new file mode 100644 index 0000000..2926406 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/b0/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b0/test.bd b/UD_any/tokeparser_incremental_b0/test.bd new file mode 100644 index 0000000..b2ef996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental_b0/tokenizer.cla b/UD_any/tokeparser_incremental_b0/tokenizer.cla new file mode 100644 index 0000000..a574ffa --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/b0/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (800,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental_b0/tokeparser.dicts b/UD_any/tokeparser_incremental_b0/tokeparser.dicts new file mode 100644 index 0000000..7378953 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/tokeparser.dicts @@ -0,0 +1,93 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_actions 18 Embeddings _ 200 +Tokenizer_bool 16 Embeddings _ 20 +Tokenizer_int 16 Embeddings _ 2000 +Tokenizer_eos 16 Embeddings _ 20 +Tokenizer_gov 16 Embeddings _ 100 +Tokenizer_pos 18 Embeddings _ 30 +Tokenizer_form 80 Embeddings _ 20 +Tokenizer_form.f 80 Embeddings _ 200000 +Tokenizer_lemma 80 Embeddings _ 20 +Tokenizer_letters 80 Embeddings _ 5000 +Tokenizer_labels 18 Embeddings _ 200 +Tokenizer_morpho 22 Embeddings _ 8000 +# TAGGER +Tagger_actions 18 Embeddings _ 200 +Tagger_bool 16 Embeddings _ 20 +Tagger_int 16 Embeddings _ 2000 +Tagger_eos 16 Embeddings _ 20 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 30 +Tagger_form 80 Embeddings _ 20 +Tagger_form.f 80 Embeddings _ 200000 +Tagger_lemma 80 Embeddings _ 20 +Tagger_letters 80 Embeddings _ 5000 +Tagger_labels 18 Embeddings _ 200 +Tagger_morpho 22 Embeddings _ 8000 +# MORPHO +Morpho_actions 18 Embeddings _ 200 +Morpho_bool 16 Embeddings _ 20 +Morpho_int 16 Embeddings _ 2000 +Morpho_eos 16 Embeddings _ 20 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 30 +Morpho_form 80 Embeddings _ 20 +Morpho_form.f 80 Embeddings _ 200000 +Morpho_lemma 80 Embeddings _ 20 +Morpho_letters 80 Embeddings _ 5000 +Morpho_labels 18 Embeddings _ 200 +Morpho_morpho 22 Embeddings _ 80000 +# LEMMATIZER_RULE +Lemmatizer_Rules_actions 18 Embeddings _ 2000 +Lemmatizer_Rules_bool 16 Embeddings _ 20 +Lemmatizer_Rules_int 16 Embeddings _ 2000 +Lemmatizer_Rules_eos 16 Embeddings _ 20 +Lemmatizer_Rules_gov 16 Embeddings _ 20 +Lemmatizer_Rules_pos 18 Embeddings _ 30 +Lemmatizer_Rules_form 80 Embeddings _ 300000 +Lemmatizer_Rules_form.f 80 Embeddings _ 20 +Lemmatizer_Rules_lemma 80 Embeddings _ 20 +Lemmatizer_Rules_letters 80 Embeddings _ 5000 +Lemmatizer_Rules_labels 18 Embeddings _ 20 +Lemmatizer_Rules_morpho 22 Embeddings _ 8000 +# LEMMATIZER_CASE +Lemmatizer_Case_actions 18 Embeddings _ 2000 +Lemmatizer_Case_bool 16 Embeddings _ 20 +Lemmatizer_Case_int 16 Embeddings _ 2000 +Lemmatizer_Case_eos 16 Embeddings _ 20 +Lemmatizer_Case_gov 16 Embeddings _ 20 +Lemmatizer_Case_pos 18 Embeddings _ 30 +Lemmatizer_Case_form 80 Embeddings _ 300000 +Lemmatizer_Case_form.f 80 Embeddings _ 20 +Lemmatizer_Case_lemma 80 Embeddings _ 20 +Lemmatizer_Case_letters 80 Embeddings _ 5000 +Lemmatizer_Case_labels 18 Embeddings _ 20 +Lemmatizer_Case_morpho 22 Embeddings _ 8000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 20 +Parser_int 16 Embeddings _ 2000 +Parser_eos 16 Embeddings _ 20 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 30 +Parser_form 80 Embeddings _ 20 +Parser_form.f 80 Embeddings _ 200000 +Parser_lemma 80 Embeddings _ 20 +Parser_letters 80 Embeddings _ 5000 +Parser_labels 18 Embeddings _ 200 +Parser_morpho 22 Embeddings _ 8000 +# SEGMENTER +Segmenter_actions 18 Embeddings _ 200 +Segmenter_bool 16 Embeddings _ 20 +Segmenter_int 16 Embeddings _ 2000 +Segmenter_eos 16 Embeddings _ 20 +Segmenter_gov 16 Embeddings _ 100 +Segmenter_pos 18 Embeddings _ 30 +Segmenter_form 80 Embeddings _ 20 +Segmenter_form.f 80 Embeddings _ 200000 +Segmenter_lemma 80 Embeddings _ 20 +Segmenter_letters 80 Embeddings _ 5000 +Segmenter_labels 18 Embeddings _ 200 +Segmenter_morpho 22 Embeddings _ 8000 diff --git a/UD_any/tokeparser_incremental_b0/train.bd b/UD_any/tokeparser_incremental_b0/train.bd new file mode 100644 index 0000000..963c311 --- /dev/null +++ b/UD_any/tokeparser_incremental_b0/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0 diff --git a/UD_any/tokeparser_incremental_b1/lemmatizer_case.cla b/UD_any/tokeparser_incremental_b1/lemmatizer_case.cla new file mode 100644 index 0000000..c42ed4f --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/b1/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_case.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b1/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental_b1/lemmatizer_lookup.cla new file mode 100644 index 0000000..41f63b2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental_b1/lemmatizer_rules.cla b/UD_any/tokeparser_incremental_b1/lemmatizer_rules.cla new file mode 100644 index 0000000..13dfa3a --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/b1/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_rules.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b1/machine.tm b/UD_any/tokeparser_incremental_b1/machine.tm new file mode 100644 index 0000000..f591178 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental_b1/morpho.cla b/UD_any/tokeparser_incremental_b1/morpho.cla new file mode 100644 index 0000000..5703ac3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/b1/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b1/normal.tm b/UD_any/tokeparser_incremental_b1/normal.tm new file mode 100644 index 0000000..77ebff3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental_b1/parser.cla b/UD_any/tokeparser_incremental_b1/parser.cla new file mode 100644 index 0000000..591d496 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/b1/parser_nofuture.fm +Action Set : data/parser.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b1/segmenter.cla b/UD_any/tokeparser_incremental_b1/segmenter.cla new file mode 100644 index 0000000..963a0f0 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/b1/parser_nofuture.fm +Action Set : data/segmenter.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b1/strategy.cla b/UD_any/tokeparser_incremental_b1/strategy.cla new file mode 100644 index 0000000..fcf66b5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental_b1/tagger.cla b/UD_any/tokeparser_incremental_b1/tagger.cla new file mode 100644 index 0000000..519b9fd --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/b1/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b1/test.bd b/UD_any/tokeparser_incremental_b1/test.bd new file mode 100644 index 0000000..b2ef996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental_b1/tokenizer.cla b/UD_any/tokeparser_incremental_b1/tokenizer.cla new file mode 100644 index 0000000..65ba67d --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/b1/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (800,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental_b1/tokeparser.dicts b/UD_any/tokeparser_incremental_b1/tokeparser.dicts new file mode 100644 index 0000000..7378953 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/tokeparser.dicts @@ -0,0 +1,93 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_actions 18 Embeddings _ 200 +Tokenizer_bool 16 Embeddings _ 20 +Tokenizer_int 16 Embeddings _ 2000 +Tokenizer_eos 16 Embeddings _ 20 +Tokenizer_gov 16 Embeddings _ 100 +Tokenizer_pos 18 Embeddings _ 30 +Tokenizer_form 80 Embeddings _ 20 +Tokenizer_form.f 80 Embeddings _ 200000 +Tokenizer_lemma 80 Embeddings _ 20 +Tokenizer_letters 80 Embeddings _ 5000 +Tokenizer_labels 18 Embeddings _ 200 +Tokenizer_morpho 22 Embeddings _ 8000 +# TAGGER +Tagger_actions 18 Embeddings _ 200 +Tagger_bool 16 Embeddings _ 20 +Tagger_int 16 Embeddings _ 2000 +Tagger_eos 16 Embeddings _ 20 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 30 +Tagger_form 80 Embeddings _ 20 +Tagger_form.f 80 Embeddings _ 200000 +Tagger_lemma 80 Embeddings _ 20 +Tagger_letters 80 Embeddings _ 5000 +Tagger_labels 18 Embeddings _ 200 +Tagger_morpho 22 Embeddings _ 8000 +# MORPHO +Morpho_actions 18 Embeddings _ 200 +Morpho_bool 16 Embeddings _ 20 +Morpho_int 16 Embeddings _ 2000 +Morpho_eos 16 Embeddings _ 20 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 30 +Morpho_form 80 Embeddings _ 20 +Morpho_form.f 80 Embeddings _ 200000 +Morpho_lemma 80 Embeddings _ 20 +Morpho_letters 80 Embeddings _ 5000 +Morpho_labels 18 Embeddings _ 200 +Morpho_morpho 22 Embeddings _ 80000 +# LEMMATIZER_RULE +Lemmatizer_Rules_actions 18 Embeddings _ 2000 +Lemmatizer_Rules_bool 16 Embeddings _ 20 +Lemmatizer_Rules_int 16 Embeddings _ 2000 +Lemmatizer_Rules_eos 16 Embeddings _ 20 +Lemmatizer_Rules_gov 16 Embeddings _ 20 +Lemmatizer_Rules_pos 18 Embeddings _ 30 +Lemmatizer_Rules_form 80 Embeddings _ 300000 +Lemmatizer_Rules_form.f 80 Embeddings _ 20 +Lemmatizer_Rules_lemma 80 Embeddings _ 20 +Lemmatizer_Rules_letters 80 Embeddings _ 5000 +Lemmatizer_Rules_labels 18 Embeddings _ 20 +Lemmatizer_Rules_morpho 22 Embeddings _ 8000 +# LEMMATIZER_CASE +Lemmatizer_Case_actions 18 Embeddings _ 2000 +Lemmatizer_Case_bool 16 Embeddings _ 20 +Lemmatizer_Case_int 16 Embeddings _ 2000 +Lemmatizer_Case_eos 16 Embeddings _ 20 +Lemmatizer_Case_gov 16 Embeddings _ 20 +Lemmatizer_Case_pos 18 Embeddings _ 30 +Lemmatizer_Case_form 80 Embeddings _ 300000 +Lemmatizer_Case_form.f 80 Embeddings _ 20 +Lemmatizer_Case_lemma 80 Embeddings _ 20 +Lemmatizer_Case_letters 80 Embeddings _ 5000 +Lemmatizer_Case_labels 18 Embeddings _ 20 +Lemmatizer_Case_morpho 22 Embeddings _ 8000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 20 +Parser_int 16 Embeddings _ 2000 +Parser_eos 16 Embeddings _ 20 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 30 +Parser_form 80 Embeddings _ 20 +Parser_form.f 80 Embeddings _ 200000 +Parser_lemma 80 Embeddings _ 20 +Parser_letters 80 Embeddings _ 5000 +Parser_labels 18 Embeddings _ 200 +Parser_morpho 22 Embeddings _ 8000 +# SEGMENTER +Segmenter_actions 18 Embeddings _ 200 +Segmenter_bool 16 Embeddings _ 20 +Segmenter_int 16 Embeddings _ 2000 +Segmenter_eos 16 Embeddings _ 20 +Segmenter_gov 16 Embeddings _ 100 +Segmenter_pos 18 Embeddings _ 30 +Segmenter_form 80 Embeddings _ 20 +Segmenter_form.f 80 Embeddings _ 200000 +Segmenter_lemma 80 Embeddings _ 20 +Segmenter_letters 80 Embeddings _ 5000 +Segmenter_labels 18 Embeddings _ 200 +Segmenter_morpho 22 Embeddings _ 8000 diff --git a/UD_any/tokeparser_incremental_b1/train.bd b/UD_any/tokeparser_incremental_b1/train.bd new file mode 100644 index 0000000..963c311 --- /dev/null +++ b/UD_any/tokeparser_incremental_b1/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0 diff --git a/UD_any/tokeparser_incremental_b2/lemmatizer_case.cla b/UD_any/tokeparser_incremental_b2/lemmatizer_case.cla new file mode 100644 index 0000000..bb7f8f9 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/b2/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_case.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b2/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental_b2/lemmatizer_lookup.cla new file mode 100644 index 0000000..41f63b2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental_b2/lemmatizer_rules.cla b/UD_any/tokeparser_incremental_b2/lemmatizer_rules.cla new file mode 100644 index 0000000..5a953da --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/b2/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_rules.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b2/machine.tm b/UD_any/tokeparser_incremental_b2/machine.tm new file mode 100644 index 0000000..f591178 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental_b2/morpho.cla b/UD_any/tokeparser_incremental_b2/morpho.cla new file mode 100644 index 0000000..af62742 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/b2/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b2/normal.tm b/UD_any/tokeparser_incremental_b2/normal.tm new file mode 100644 index 0000000..77ebff3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental_b2/parser.cla b/UD_any/tokeparser_incremental_b2/parser.cla new file mode 100644 index 0000000..173e09a --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/b2/parser_nofuture.fm +Action Set : data/parser.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b2/segmenter.cla b/UD_any/tokeparser_incremental_b2/segmenter.cla new file mode 100644 index 0000000..a230c5e --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/b2/parser_nofuture.fm +Action Set : data/segmenter.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b2/strategy.cla b/UD_any/tokeparser_incremental_b2/strategy.cla new file mode 100644 index 0000000..fcf66b5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental_b2/tagger.cla b/UD_any/tokeparser_incremental_b2/tagger.cla new file mode 100644 index 0000000..2d0fda8 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/b2/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b2/test.bd b/UD_any/tokeparser_incremental_b2/test.bd new file mode 100644 index 0000000..b2ef996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental_b2/tokenizer.cla b/UD_any/tokeparser_incremental_b2/tokenizer.cla new file mode 100644 index 0000000..a32aeaf --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/b2/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (800,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental_b2/tokeparser.dicts b/UD_any/tokeparser_incremental_b2/tokeparser.dicts new file mode 100644 index 0000000..7378953 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/tokeparser.dicts @@ -0,0 +1,93 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_actions 18 Embeddings _ 200 +Tokenizer_bool 16 Embeddings _ 20 +Tokenizer_int 16 Embeddings _ 2000 +Tokenizer_eos 16 Embeddings _ 20 +Tokenizer_gov 16 Embeddings _ 100 +Tokenizer_pos 18 Embeddings _ 30 +Tokenizer_form 80 Embeddings _ 20 +Tokenizer_form.f 80 Embeddings _ 200000 +Tokenizer_lemma 80 Embeddings _ 20 +Tokenizer_letters 80 Embeddings _ 5000 +Tokenizer_labels 18 Embeddings _ 200 +Tokenizer_morpho 22 Embeddings _ 8000 +# TAGGER +Tagger_actions 18 Embeddings _ 200 +Tagger_bool 16 Embeddings _ 20 +Tagger_int 16 Embeddings _ 2000 +Tagger_eos 16 Embeddings _ 20 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 30 +Tagger_form 80 Embeddings _ 20 +Tagger_form.f 80 Embeddings _ 200000 +Tagger_lemma 80 Embeddings _ 20 +Tagger_letters 80 Embeddings _ 5000 +Tagger_labels 18 Embeddings _ 200 +Tagger_morpho 22 Embeddings _ 8000 +# MORPHO +Morpho_actions 18 Embeddings _ 200 +Morpho_bool 16 Embeddings _ 20 +Morpho_int 16 Embeddings _ 2000 +Morpho_eos 16 Embeddings _ 20 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 30 +Morpho_form 80 Embeddings _ 20 +Morpho_form.f 80 Embeddings _ 200000 +Morpho_lemma 80 Embeddings _ 20 +Morpho_letters 80 Embeddings _ 5000 +Morpho_labels 18 Embeddings _ 200 +Morpho_morpho 22 Embeddings _ 80000 +# LEMMATIZER_RULE +Lemmatizer_Rules_actions 18 Embeddings _ 2000 +Lemmatizer_Rules_bool 16 Embeddings _ 20 +Lemmatizer_Rules_int 16 Embeddings _ 2000 +Lemmatizer_Rules_eos 16 Embeddings _ 20 +Lemmatizer_Rules_gov 16 Embeddings _ 20 +Lemmatizer_Rules_pos 18 Embeddings _ 30 +Lemmatizer_Rules_form 80 Embeddings _ 300000 +Lemmatizer_Rules_form.f 80 Embeddings _ 20 +Lemmatizer_Rules_lemma 80 Embeddings _ 20 +Lemmatizer_Rules_letters 80 Embeddings _ 5000 +Lemmatizer_Rules_labels 18 Embeddings _ 20 +Lemmatizer_Rules_morpho 22 Embeddings _ 8000 +# LEMMATIZER_CASE +Lemmatizer_Case_actions 18 Embeddings _ 2000 +Lemmatizer_Case_bool 16 Embeddings _ 20 +Lemmatizer_Case_int 16 Embeddings _ 2000 +Lemmatizer_Case_eos 16 Embeddings _ 20 +Lemmatizer_Case_gov 16 Embeddings _ 20 +Lemmatizer_Case_pos 18 Embeddings _ 30 +Lemmatizer_Case_form 80 Embeddings _ 300000 +Lemmatizer_Case_form.f 80 Embeddings _ 20 +Lemmatizer_Case_lemma 80 Embeddings _ 20 +Lemmatizer_Case_letters 80 Embeddings _ 5000 +Lemmatizer_Case_labels 18 Embeddings _ 20 +Lemmatizer_Case_morpho 22 Embeddings _ 8000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 20 +Parser_int 16 Embeddings _ 2000 +Parser_eos 16 Embeddings _ 20 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 30 +Parser_form 80 Embeddings _ 20 +Parser_form.f 80 Embeddings _ 200000 +Parser_lemma 80 Embeddings _ 20 +Parser_letters 80 Embeddings _ 5000 +Parser_labels 18 Embeddings _ 200 +Parser_morpho 22 Embeddings _ 8000 +# SEGMENTER +Segmenter_actions 18 Embeddings _ 200 +Segmenter_bool 16 Embeddings _ 20 +Segmenter_int 16 Embeddings _ 2000 +Segmenter_eos 16 Embeddings _ 20 +Segmenter_gov 16 Embeddings _ 100 +Segmenter_pos 18 Embeddings _ 30 +Segmenter_form 80 Embeddings _ 20 +Segmenter_form.f 80 Embeddings _ 200000 +Segmenter_lemma 80 Embeddings _ 20 +Segmenter_letters 80 Embeddings _ 5000 +Segmenter_labels 18 Embeddings _ 200 +Segmenter_morpho 22 Embeddings _ 8000 diff --git a/UD_any/tokeparser_incremental_b2/train.bd b/UD_any/tokeparser_incremental_b2/train.bd new file mode 100644 index 0000000..963c311 --- /dev/null +++ b/UD_any/tokeparser_incremental_b2/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0 diff --git a/UD_any/tokeparser_incremental_b3/lemmatizer_case.cla b/UD_any/tokeparser_incremental_b3/lemmatizer_case.cla new file mode 100644 index 0000000..9a05392 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/b3/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_case.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b3/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental_b3/lemmatizer_lookup.cla new file mode 100644 index 0000000..41f63b2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental_b3/lemmatizer_rules.cla b/UD_any/tokeparser_incremental_b3/lemmatizer_rules.cla new file mode 100644 index 0000000..59297b0 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/b3/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_rules.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b3/machine.tm b/UD_any/tokeparser_incremental_b3/machine.tm new file mode 100644 index 0000000..f591178 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental_b3/morpho.cla b/UD_any/tokeparser_incremental_b3/morpho.cla new file mode 100644 index 0000000..edd7e6d --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/b3/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b3/normal.tm b/UD_any/tokeparser_incremental_b3/normal.tm new file mode 100644 index 0000000..77ebff3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental_b3/parser.cla b/UD_any/tokeparser_incremental_b3/parser.cla new file mode 100644 index 0000000..ec3ee96 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/b3/parser_nofuture.fm +Action Set : data/parser.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b3/segmenter.cla b/UD_any/tokeparser_incremental_b3/segmenter.cla new file mode 100644 index 0000000..f4f3830 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/b3/parser_nofuture.fm +Action Set : data/segmenter.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b3/strategy.cla b/UD_any/tokeparser_incremental_b3/strategy.cla new file mode 100644 index 0000000..fcf66b5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental_b3/tagger.cla b/UD_any/tokeparser_incremental_b3/tagger.cla new file mode 100644 index 0000000..ab1740f --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/b3/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b3/test.bd b/UD_any/tokeparser_incremental_b3/test.bd new file mode 100644 index 0000000..b2ef996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental_b3/tokenizer.cla b/UD_any/tokeparser_incremental_b3/tokenizer.cla new file mode 100644 index 0000000..0cbf996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/b3/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (800,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental_b3/tokeparser.dicts b/UD_any/tokeparser_incremental_b3/tokeparser.dicts new file mode 100644 index 0000000..7378953 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/tokeparser.dicts @@ -0,0 +1,93 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_actions 18 Embeddings _ 200 +Tokenizer_bool 16 Embeddings _ 20 +Tokenizer_int 16 Embeddings _ 2000 +Tokenizer_eos 16 Embeddings _ 20 +Tokenizer_gov 16 Embeddings _ 100 +Tokenizer_pos 18 Embeddings _ 30 +Tokenizer_form 80 Embeddings _ 20 +Tokenizer_form.f 80 Embeddings _ 200000 +Tokenizer_lemma 80 Embeddings _ 20 +Tokenizer_letters 80 Embeddings _ 5000 +Tokenizer_labels 18 Embeddings _ 200 +Tokenizer_morpho 22 Embeddings _ 8000 +# TAGGER +Tagger_actions 18 Embeddings _ 200 +Tagger_bool 16 Embeddings _ 20 +Tagger_int 16 Embeddings _ 2000 +Tagger_eos 16 Embeddings _ 20 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 30 +Tagger_form 80 Embeddings _ 20 +Tagger_form.f 80 Embeddings _ 200000 +Tagger_lemma 80 Embeddings _ 20 +Tagger_letters 80 Embeddings _ 5000 +Tagger_labels 18 Embeddings _ 200 +Tagger_morpho 22 Embeddings _ 8000 +# MORPHO +Morpho_actions 18 Embeddings _ 200 +Morpho_bool 16 Embeddings _ 20 +Morpho_int 16 Embeddings _ 2000 +Morpho_eos 16 Embeddings _ 20 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 30 +Morpho_form 80 Embeddings _ 20 +Morpho_form.f 80 Embeddings _ 200000 +Morpho_lemma 80 Embeddings _ 20 +Morpho_letters 80 Embeddings _ 5000 +Morpho_labels 18 Embeddings _ 200 +Morpho_morpho 22 Embeddings _ 80000 +# LEMMATIZER_RULE +Lemmatizer_Rules_actions 18 Embeddings _ 2000 +Lemmatizer_Rules_bool 16 Embeddings _ 20 +Lemmatizer_Rules_int 16 Embeddings _ 2000 +Lemmatizer_Rules_eos 16 Embeddings _ 20 +Lemmatizer_Rules_gov 16 Embeddings _ 20 +Lemmatizer_Rules_pos 18 Embeddings _ 30 +Lemmatizer_Rules_form 80 Embeddings _ 300000 +Lemmatizer_Rules_form.f 80 Embeddings _ 20 +Lemmatizer_Rules_lemma 80 Embeddings _ 20 +Lemmatizer_Rules_letters 80 Embeddings _ 5000 +Lemmatizer_Rules_labels 18 Embeddings _ 20 +Lemmatizer_Rules_morpho 22 Embeddings _ 8000 +# LEMMATIZER_CASE +Lemmatizer_Case_actions 18 Embeddings _ 2000 +Lemmatizer_Case_bool 16 Embeddings _ 20 +Lemmatizer_Case_int 16 Embeddings _ 2000 +Lemmatizer_Case_eos 16 Embeddings _ 20 +Lemmatizer_Case_gov 16 Embeddings _ 20 +Lemmatizer_Case_pos 18 Embeddings _ 30 +Lemmatizer_Case_form 80 Embeddings _ 300000 +Lemmatizer_Case_form.f 80 Embeddings _ 20 +Lemmatizer_Case_lemma 80 Embeddings _ 20 +Lemmatizer_Case_letters 80 Embeddings _ 5000 +Lemmatizer_Case_labels 18 Embeddings _ 20 +Lemmatizer_Case_morpho 22 Embeddings _ 8000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 20 +Parser_int 16 Embeddings _ 2000 +Parser_eos 16 Embeddings _ 20 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 30 +Parser_form 80 Embeddings _ 20 +Parser_form.f 80 Embeddings _ 200000 +Parser_lemma 80 Embeddings _ 20 +Parser_letters 80 Embeddings _ 5000 +Parser_labels 18 Embeddings _ 200 +Parser_morpho 22 Embeddings _ 8000 +# SEGMENTER +Segmenter_actions 18 Embeddings _ 200 +Segmenter_bool 16 Embeddings _ 20 +Segmenter_int 16 Embeddings _ 2000 +Segmenter_eos 16 Embeddings _ 20 +Segmenter_gov 16 Embeddings _ 100 +Segmenter_pos 18 Embeddings _ 30 +Segmenter_form 80 Embeddings _ 20 +Segmenter_form.f 80 Embeddings _ 200000 +Segmenter_lemma 80 Embeddings _ 20 +Segmenter_letters 80 Embeddings _ 5000 +Segmenter_labels 18 Embeddings _ 200 +Segmenter_morpho 22 Embeddings _ 8000 diff --git a/UD_any/tokeparser_incremental_b3/train.bd b/UD_any/tokeparser_incremental_b3/train.bd new file mode 100644 index 0000000..963c311 --- /dev/null +++ b/UD_any/tokeparser_incremental_b3/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0 diff --git a/UD_any/tokeparser_incremental_b4/lemmatizer_case.cla b/UD_any/tokeparser_incremental_b4/lemmatizer_case.cla new file mode 100644 index 0000000..9e053b4 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/b4/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_case.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b4/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental_b4/lemmatizer_lookup.cla new file mode 100644 index 0000000..41f63b2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental_b4/lemmatizer_rules.cla b/UD_any/tokeparser_incremental_b4/lemmatizer_rules.cla new file mode 100644 index 0000000..74cc521 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/b4/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_rules.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b4/machine.tm b/UD_any/tokeparser_incremental_b4/machine.tm new file mode 100644 index 0000000..f591178 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental_b4/morpho.cla b/UD_any/tokeparser_incremental_b4/morpho.cla new file mode 100644 index 0000000..1e7a2e8 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/b4/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b4/normal.tm b/UD_any/tokeparser_incremental_b4/normal.tm new file mode 100644 index 0000000..77ebff3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental_b4/parser.cla b/UD_any/tokeparser_incremental_b4/parser.cla new file mode 100644 index 0000000..3c88628 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/b4/parser_nofuture.fm +Action Set : data/parser.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b4/segmenter.cla b/UD_any/tokeparser_incremental_b4/segmenter.cla new file mode 100644 index 0000000..f739a6e --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/b4/parser_nofuture.fm +Action Set : data/segmenter.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b4/strategy.cla b/UD_any/tokeparser_incremental_b4/strategy.cla new file mode 100644 index 0000000..fcf66b5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental_b4/tagger.cla b/UD_any/tokeparser_incremental_b4/tagger.cla new file mode 100644 index 0000000..15da3fc --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/b4/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b4/test.bd b/UD_any/tokeparser_incremental_b4/test.bd new file mode 100644 index 0000000..b2ef996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental_b4/tokenizer.cla b/UD_any/tokeparser_incremental_b4/tokenizer.cla new file mode 100644 index 0000000..83165a3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/b4/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (800,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental_b4/tokeparser.dicts b/UD_any/tokeparser_incremental_b4/tokeparser.dicts new file mode 100644 index 0000000..7378953 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/tokeparser.dicts @@ -0,0 +1,93 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_actions 18 Embeddings _ 200 +Tokenizer_bool 16 Embeddings _ 20 +Tokenizer_int 16 Embeddings _ 2000 +Tokenizer_eos 16 Embeddings _ 20 +Tokenizer_gov 16 Embeddings _ 100 +Tokenizer_pos 18 Embeddings _ 30 +Tokenizer_form 80 Embeddings _ 20 +Tokenizer_form.f 80 Embeddings _ 200000 +Tokenizer_lemma 80 Embeddings _ 20 +Tokenizer_letters 80 Embeddings _ 5000 +Tokenizer_labels 18 Embeddings _ 200 +Tokenizer_morpho 22 Embeddings _ 8000 +# TAGGER +Tagger_actions 18 Embeddings _ 200 +Tagger_bool 16 Embeddings _ 20 +Tagger_int 16 Embeddings _ 2000 +Tagger_eos 16 Embeddings _ 20 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 30 +Tagger_form 80 Embeddings _ 20 +Tagger_form.f 80 Embeddings _ 200000 +Tagger_lemma 80 Embeddings _ 20 +Tagger_letters 80 Embeddings _ 5000 +Tagger_labels 18 Embeddings _ 200 +Tagger_morpho 22 Embeddings _ 8000 +# MORPHO +Morpho_actions 18 Embeddings _ 200 +Morpho_bool 16 Embeddings _ 20 +Morpho_int 16 Embeddings _ 2000 +Morpho_eos 16 Embeddings _ 20 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 30 +Morpho_form 80 Embeddings _ 20 +Morpho_form.f 80 Embeddings _ 200000 +Morpho_lemma 80 Embeddings _ 20 +Morpho_letters 80 Embeddings _ 5000 +Morpho_labels 18 Embeddings _ 200 +Morpho_morpho 22 Embeddings _ 80000 +# LEMMATIZER_RULE +Lemmatizer_Rules_actions 18 Embeddings _ 2000 +Lemmatizer_Rules_bool 16 Embeddings _ 20 +Lemmatizer_Rules_int 16 Embeddings _ 2000 +Lemmatizer_Rules_eos 16 Embeddings _ 20 +Lemmatizer_Rules_gov 16 Embeddings _ 20 +Lemmatizer_Rules_pos 18 Embeddings _ 30 +Lemmatizer_Rules_form 80 Embeddings _ 300000 +Lemmatizer_Rules_form.f 80 Embeddings _ 20 +Lemmatizer_Rules_lemma 80 Embeddings _ 20 +Lemmatizer_Rules_letters 80 Embeddings _ 5000 +Lemmatizer_Rules_labels 18 Embeddings _ 20 +Lemmatizer_Rules_morpho 22 Embeddings _ 8000 +# LEMMATIZER_CASE +Lemmatizer_Case_actions 18 Embeddings _ 2000 +Lemmatizer_Case_bool 16 Embeddings _ 20 +Lemmatizer_Case_int 16 Embeddings _ 2000 +Lemmatizer_Case_eos 16 Embeddings _ 20 +Lemmatizer_Case_gov 16 Embeddings _ 20 +Lemmatizer_Case_pos 18 Embeddings _ 30 +Lemmatizer_Case_form 80 Embeddings _ 300000 +Lemmatizer_Case_form.f 80 Embeddings _ 20 +Lemmatizer_Case_lemma 80 Embeddings _ 20 +Lemmatizer_Case_letters 80 Embeddings _ 5000 +Lemmatizer_Case_labels 18 Embeddings _ 20 +Lemmatizer_Case_morpho 22 Embeddings _ 8000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 20 +Parser_int 16 Embeddings _ 2000 +Parser_eos 16 Embeddings _ 20 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 30 +Parser_form 80 Embeddings _ 20 +Parser_form.f 80 Embeddings _ 200000 +Parser_lemma 80 Embeddings _ 20 +Parser_letters 80 Embeddings _ 5000 +Parser_labels 18 Embeddings _ 200 +Parser_morpho 22 Embeddings _ 8000 +# SEGMENTER +Segmenter_actions 18 Embeddings _ 200 +Segmenter_bool 16 Embeddings _ 20 +Segmenter_int 16 Embeddings _ 2000 +Segmenter_eos 16 Embeddings _ 20 +Segmenter_gov 16 Embeddings _ 100 +Segmenter_pos 18 Embeddings _ 30 +Segmenter_form 80 Embeddings _ 20 +Segmenter_form.f 80 Embeddings _ 200000 +Segmenter_lemma 80 Embeddings _ 20 +Segmenter_letters 80 Embeddings _ 5000 +Segmenter_labels 18 Embeddings _ 200 +Segmenter_morpho 22 Embeddings _ 8000 diff --git a/UD_any/tokeparser_incremental_b4/train.bd b/UD_any/tokeparser_incremental_b4/train.bd new file mode 100644 index 0000000..963c311 --- /dev/null +++ b/UD_any/tokeparser_incremental_b4/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0 diff --git a/UD_any/tokeparser_incremental_b5/lemmatizer_case.cla b/UD_any/tokeparser_incremental_b5/lemmatizer_case.cla new file mode 100644 index 0000000..2514b83 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/b5/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_case.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b5/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental_b5/lemmatizer_lookup.cla new file mode 100644 index 0000000..41f63b2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental_b5/lemmatizer_rules.cla b/UD_any/tokeparser_incremental_b5/lemmatizer_rules.cla new file mode 100644 index 0000000..cb1a731 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/b5/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_rules.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b5/machine.tm b/UD_any/tokeparser_incremental_b5/machine.tm new file mode 100644 index 0000000..f591178 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental_b5/morpho.cla b/UD_any/tokeparser_incremental_b5/morpho.cla new file mode 100644 index 0000000..0a3b895 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/b5/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b5/normal.tm b/UD_any/tokeparser_incremental_b5/normal.tm new file mode 100644 index 0000000..77ebff3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental_b5/parser.cla b/UD_any/tokeparser_incremental_b5/parser.cla new file mode 100644 index 0000000..7031c27 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/b5/parser_nofuture.fm +Action Set : data/parser.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b5/segmenter.cla b/UD_any/tokeparser_incremental_b5/segmenter.cla new file mode 100644 index 0000000..d31db82 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/b5/parser_nofuture.fm +Action Set : data/segmenter.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b5/strategy.cla b/UD_any/tokeparser_incremental_b5/strategy.cla new file mode 100644 index 0000000..fcf66b5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental_b5/tagger.cla b/UD_any/tokeparser_incremental_b5/tagger.cla new file mode 100644 index 0000000..f14190d --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/b5/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b5/test.bd b/UD_any/tokeparser_incremental_b5/test.bd new file mode 100644 index 0000000..b2ef996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental_b5/tokenizer.cla b/UD_any/tokeparser_incremental_b5/tokenizer.cla new file mode 100644 index 0000000..535bb4e --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/b5/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (800,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental_b5/tokeparser.dicts b/UD_any/tokeparser_incremental_b5/tokeparser.dicts new file mode 100644 index 0000000..7378953 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/tokeparser.dicts @@ -0,0 +1,93 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_actions 18 Embeddings _ 200 +Tokenizer_bool 16 Embeddings _ 20 +Tokenizer_int 16 Embeddings _ 2000 +Tokenizer_eos 16 Embeddings _ 20 +Tokenizer_gov 16 Embeddings _ 100 +Tokenizer_pos 18 Embeddings _ 30 +Tokenizer_form 80 Embeddings _ 20 +Tokenizer_form.f 80 Embeddings _ 200000 +Tokenizer_lemma 80 Embeddings _ 20 +Tokenizer_letters 80 Embeddings _ 5000 +Tokenizer_labels 18 Embeddings _ 200 +Tokenizer_morpho 22 Embeddings _ 8000 +# TAGGER +Tagger_actions 18 Embeddings _ 200 +Tagger_bool 16 Embeddings _ 20 +Tagger_int 16 Embeddings _ 2000 +Tagger_eos 16 Embeddings _ 20 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 30 +Tagger_form 80 Embeddings _ 20 +Tagger_form.f 80 Embeddings _ 200000 +Tagger_lemma 80 Embeddings _ 20 +Tagger_letters 80 Embeddings _ 5000 +Tagger_labels 18 Embeddings _ 200 +Tagger_morpho 22 Embeddings _ 8000 +# MORPHO +Morpho_actions 18 Embeddings _ 200 +Morpho_bool 16 Embeddings _ 20 +Morpho_int 16 Embeddings _ 2000 +Morpho_eos 16 Embeddings _ 20 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 30 +Morpho_form 80 Embeddings _ 20 +Morpho_form.f 80 Embeddings _ 200000 +Morpho_lemma 80 Embeddings _ 20 +Morpho_letters 80 Embeddings _ 5000 +Morpho_labels 18 Embeddings _ 200 +Morpho_morpho 22 Embeddings _ 80000 +# LEMMATIZER_RULE +Lemmatizer_Rules_actions 18 Embeddings _ 2000 +Lemmatizer_Rules_bool 16 Embeddings _ 20 +Lemmatizer_Rules_int 16 Embeddings _ 2000 +Lemmatizer_Rules_eos 16 Embeddings _ 20 +Lemmatizer_Rules_gov 16 Embeddings _ 20 +Lemmatizer_Rules_pos 18 Embeddings _ 30 +Lemmatizer_Rules_form 80 Embeddings _ 300000 +Lemmatizer_Rules_form.f 80 Embeddings _ 20 +Lemmatizer_Rules_lemma 80 Embeddings _ 20 +Lemmatizer_Rules_letters 80 Embeddings _ 5000 +Lemmatizer_Rules_labels 18 Embeddings _ 20 +Lemmatizer_Rules_morpho 22 Embeddings _ 8000 +# LEMMATIZER_CASE +Lemmatizer_Case_actions 18 Embeddings _ 2000 +Lemmatizer_Case_bool 16 Embeddings _ 20 +Lemmatizer_Case_int 16 Embeddings _ 2000 +Lemmatizer_Case_eos 16 Embeddings _ 20 +Lemmatizer_Case_gov 16 Embeddings _ 20 +Lemmatizer_Case_pos 18 Embeddings _ 30 +Lemmatizer_Case_form 80 Embeddings _ 300000 +Lemmatizer_Case_form.f 80 Embeddings _ 20 +Lemmatizer_Case_lemma 80 Embeddings _ 20 +Lemmatizer_Case_letters 80 Embeddings _ 5000 +Lemmatizer_Case_labels 18 Embeddings _ 20 +Lemmatizer_Case_morpho 22 Embeddings _ 8000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 20 +Parser_int 16 Embeddings _ 2000 +Parser_eos 16 Embeddings _ 20 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 30 +Parser_form 80 Embeddings _ 20 +Parser_form.f 80 Embeddings _ 200000 +Parser_lemma 80 Embeddings _ 20 +Parser_letters 80 Embeddings _ 5000 +Parser_labels 18 Embeddings _ 200 +Parser_morpho 22 Embeddings _ 8000 +# SEGMENTER +Segmenter_actions 18 Embeddings _ 200 +Segmenter_bool 16 Embeddings _ 20 +Segmenter_int 16 Embeddings _ 2000 +Segmenter_eos 16 Embeddings _ 20 +Segmenter_gov 16 Embeddings _ 100 +Segmenter_pos 18 Embeddings _ 30 +Segmenter_form 80 Embeddings _ 20 +Segmenter_form.f 80 Embeddings _ 200000 +Segmenter_lemma 80 Embeddings _ 20 +Segmenter_letters 80 Embeddings _ 5000 +Segmenter_labels 18 Embeddings _ 200 +Segmenter_morpho 22 Embeddings _ 8000 diff --git a/UD_any/tokeparser_incremental_b5/train.bd b/UD_any/tokeparser_incremental_b5/train.bd new file mode 100644 index 0000000..963c311 --- /dev/null +++ b/UD_any/tokeparser_incremental_b5/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0 diff --git a/UD_any/tokeparser_incremental_b6/lemmatizer_case.cla b/UD_any/tokeparser_incremental_b6/lemmatizer_case.cla new file mode 100644 index 0000000..69d719e --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/lemmatizer_case.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Case +Type : Prediction +Oracle : lemma_case +Feature Model : data/feature_models/b6/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_case.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b6/lemmatizer_lookup.cla b/UD_any/tokeparser_incremental_b6/lemmatizer_lookup.cla new file mode 100644 index 0000000..41f63b2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/lemmatizer_lookup.cla @@ -0,0 +1,4 @@ +Name : Lemmatizer_Lookup +Type : Information +Oracle : lemma_lookup +Oracle Filename : data/maca_trans_lemmatizer_exceptions.fplm diff --git a/UD_any/tokeparser_incremental_b6/lemmatizer_rules.cla b/UD_any/tokeparser_incremental_b6/lemmatizer_rules.cla new file mode 100644 index 0000000..b7c10e3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/lemmatizer_rules.cla @@ -0,0 +1,7 @@ +Name : Lemmatizer_Rules +Type : Prediction +Oracle : lemma_rules +Feature Model : data/feature_models/b6/lemmatizer_rules_incremental.fm +Action Set : data/lemmatizer_rules.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b6/machine.tm b/UD_any/tokeparser_incremental_b6/machine.tm new file mode 100644 index 0000000..f591178 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/machine.tm @@ -0,0 +1,39 @@ +Name : Tokenizer, Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tokeparser.dicts +%CLASSIFIERS +strategy strategy.cla +tokenizer tokenizer.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +segmenter segmenter.cla +%STATES +strategy strategy +tokenizer tokenizer +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +segmenter segmenter +%TRANSITIONS +strategy tokenizer MOVE tokenizer +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +strategy segmenter MOVE segmenter +tokenizer strategy * +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * +segmenter strategy * diff --git a/UD_any/tokeparser_incremental_b6/morpho.cla b/UD_any/tokeparser_incremental_b6/morpho.cla new file mode 100644 index 0000000..ed5b4ea --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/morpho.cla @@ -0,0 +1,7 @@ +Name : Morpho +Type : Prediction +Oracle : morpho +Feature Model : data/feature_models/b6/morpho_incremental.fm +Action Set : data/morpho_parts.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b6/normal.tm b/UD_any/tokeparser_incremental_b6/normal.tm new file mode 100644 index 0000000..77ebff3 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/normal.tm @@ -0,0 +1,31 @@ +Name : Tagger, Morpho, Lemmatizer and Parser Machine +Dicts : tagparser.dicts +%CLASSIFIERS +strategy strategy.cla +tagger tagger.cla +morpho morpho.cla +lemmatizer_lookup lemmatizer_lookup.cla +lemmatizer_rules lemmatizer_rules.cla +lemmatizer_case lemmatizer_case.cla +parser parser.cla +%STATES +strategy strategy +tagger tagger +morpho morpho +lemmatizer_lookup lemmatizer_lookup +lemmatizer_rules lemmatizer_rules +lemmatizer_case lemmatizer_case +parser parser +%TRANSITIONS +strategy tagger MOVE tagger +strategy morpho MOVE morpho +strategy lemmatizer_lookup MOVE lemmatizer_lookup +strategy lemmatizer_rules MOVE lemmatizer_rules +strategy lemmatizer_case MOVE lemmatizer_case +strategy parser MOVE parser +tagger strategy * +morpho strategy * +lemmatizer_lookup strategy * +lemmatizer_case strategy * +lemmatizer_rules strategy * +parser strategy * diff --git a/UD_any/tokeparser_incremental_b6/parser.cla b/UD_any/tokeparser_incremental_b6/parser.cla new file mode 100644 index 0000000..cad4cb1 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/parser.cla @@ -0,0 +1,7 @@ +Name : Parser +Type : Prediction +Oracle : parser +Feature Model : data/feature_models/b6/parser_nofuture.fm +Action Set : data/parser.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b6/segmenter.cla b/UD_any/tokeparser_incremental_b6/segmenter.cla new file mode 100644 index 0000000..00b8eea --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/segmenter.cla @@ -0,0 +1,7 @@ +Name : Segmenter +Type : Prediction +Oracle : segmenter +Feature Model : data/feature_models/b6/parser_nofuture.fm +Action Set : data/segmenter.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b6/strategy.cla b/UD_any/tokeparser_incremental_b6/strategy.cla new file mode 100644 index 0000000..fcf66b5 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/strategy.cla @@ -0,0 +1,4 @@ +Name : Strategy +Type : Information +Oracle : strategy_tokenizer,tagger,morpho,lemmatizer,parser +Oracle Filename : none diff --git a/UD_any/tokeparser_incremental_b6/tagger.cla b/UD_any/tokeparser_incremental_b6/tagger.cla new file mode 100644 index 0000000..78d2ee2 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/tagger.cla @@ -0,0 +1,7 @@ +Name : Tagger +Type : Prediction +Oracle : tagger +Feature Model : data/feature_models/b6/tagger_incremental.fm +Action Set : data/tagger.as +Topology : (800,RELU,0.3) +Dynamic : yes diff --git a/UD_any/tokeparser_incremental_b6/test.bd b/UD_any/tokeparser_incremental_b6/test.bd new file mode 100644 index 0000000..b2ef996 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/test.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none Final 1 +1 FORM hyp form Final 1 +3 POS hyp pos Final 1 +4 XPOS hyp pos Final 1 +5 MORPHO hyp morpho Final 1 +2 LEMMA hyp form Final 1 +6 GOV hyp int Final 1 +7 LABEL hyp labels Final 1 +0 EOS hyp eos Final 0 diff --git a/UD_any/tokeparser_incremental_b6/tokenizer.cla b/UD_any/tokeparser_incremental_b6/tokenizer.cla new file mode 100644 index 0000000..6cdfb92 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/tokenizer.cla @@ -0,0 +1,7 @@ +Name : Tokenizer +Type : Prediction +Oracle : tokenizer +Feature Model : data/feature_models/b6/tokenizer_incremental.fm +Action Set : data/tokenizer.as +Topology : (800,RELU,0.3) +Dynamic : no diff --git a/UD_any/tokeparser_incremental_b6/tokeparser.dicts b/UD_any/tokeparser_incremental_b6/tokeparser.dicts new file mode 100644 index 0000000..7378953 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/tokeparser.dicts @@ -0,0 +1,93 @@ +#Name Dimension Mode # +############################ +# TOKENIZER +Tokenizer_actions 18 Embeddings _ 200 +Tokenizer_bool 16 Embeddings _ 20 +Tokenizer_int 16 Embeddings _ 2000 +Tokenizer_eos 16 Embeddings _ 20 +Tokenizer_gov 16 Embeddings _ 100 +Tokenizer_pos 18 Embeddings _ 30 +Tokenizer_form 80 Embeddings _ 20 +Tokenizer_form.f 80 Embeddings _ 200000 +Tokenizer_lemma 80 Embeddings _ 20 +Tokenizer_letters 80 Embeddings _ 5000 +Tokenizer_labels 18 Embeddings _ 200 +Tokenizer_morpho 22 Embeddings _ 8000 +# TAGGER +Tagger_actions 18 Embeddings _ 200 +Tagger_bool 16 Embeddings _ 20 +Tagger_int 16 Embeddings _ 2000 +Tagger_eos 16 Embeddings _ 20 +Tagger_gov 16 Embeddings _ 100 +Tagger_pos 18 Embeddings _ 30 +Tagger_form 80 Embeddings _ 20 +Tagger_form.f 80 Embeddings _ 200000 +Tagger_lemma 80 Embeddings _ 20 +Tagger_letters 80 Embeddings _ 5000 +Tagger_labels 18 Embeddings _ 200 +Tagger_morpho 22 Embeddings _ 8000 +# MORPHO +Morpho_actions 18 Embeddings _ 200 +Morpho_bool 16 Embeddings _ 20 +Morpho_int 16 Embeddings _ 2000 +Morpho_eos 16 Embeddings _ 20 +Morpho_gov 16 Embeddings _ 100 +Morpho_pos 18 Embeddings _ 30 +Morpho_form 80 Embeddings _ 20 +Morpho_form.f 80 Embeddings _ 200000 +Morpho_lemma 80 Embeddings _ 20 +Morpho_letters 80 Embeddings _ 5000 +Morpho_labels 18 Embeddings _ 200 +Morpho_morpho 22 Embeddings _ 80000 +# LEMMATIZER_RULE +Lemmatizer_Rules_actions 18 Embeddings _ 2000 +Lemmatizer_Rules_bool 16 Embeddings _ 20 +Lemmatizer_Rules_int 16 Embeddings _ 2000 +Lemmatizer_Rules_eos 16 Embeddings _ 20 +Lemmatizer_Rules_gov 16 Embeddings _ 20 +Lemmatizer_Rules_pos 18 Embeddings _ 30 +Lemmatizer_Rules_form 80 Embeddings _ 300000 +Lemmatizer_Rules_form.f 80 Embeddings _ 20 +Lemmatizer_Rules_lemma 80 Embeddings _ 20 +Lemmatizer_Rules_letters 80 Embeddings _ 5000 +Lemmatizer_Rules_labels 18 Embeddings _ 20 +Lemmatizer_Rules_morpho 22 Embeddings _ 8000 +# LEMMATIZER_CASE +Lemmatizer_Case_actions 18 Embeddings _ 2000 +Lemmatizer_Case_bool 16 Embeddings _ 20 +Lemmatizer_Case_int 16 Embeddings _ 2000 +Lemmatizer_Case_eos 16 Embeddings _ 20 +Lemmatizer_Case_gov 16 Embeddings _ 20 +Lemmatizer_Case_pos 18 Embeddings _ 30 +Lemmatizer_Case_form 80 Embeddings _ 300000 +Lemmatizer_Case_form.f 80 Embeddings _ 20 +Lemmatizer_Case_lemma 80 Embeddings _ 20 +Lemmatizer_Case_letters 80 Embeddings _ 5000 +Lemmatizer_Case_labels 18 Embeddings _ 20 +Lemmatizer_Case_morpho 22 Embeddings _ 8000 +# PARSER +Parser_actions 18 Embeddings _ 200 +Parser_bool 16 Embeddings _ 20 +Parser_int 16 Embeddings _ 2000 +Parser_eos 16 Embeddings _ 20 +Parser_gov 16 Embeddings _ 100 +Parser_pos 18 Embeddings _ 30 +Parser_form 80 Embeddings _ 20 +Parser_form.f 80 Embeddings _ 200000 +Parser_lemma 80 Embeddings _ 20 +Parser_letters 80 Embeddings _ 5000 +Parser_labels 18 Embeddings _ 200 +Parser_morpho 22 Embeddings _ 8000 +# SEGMENTER +Segmenter_actions 18 Embeddings _ 200 +Segmenter_bool 16 Embeddings _ 20 +Segmenter_int 16 Embeddings _ 2000 +Segmenter_eos 16 Embeddings _ 20 +Segmenter_gov 16 Embeddings _ 100 +Segmenter_pos 18 Embeddings _ 30 +Segmenter_form 80 Embeddings _ 20 +Segmenter_form.f 80 Embeddings _ 200000 +Segmenter_lemma 80 Embeddings _ 20 +Segmenter_letters 80 Embeddings _ 5000 +Segmenter_labels 18 Embeddings _ 200 +Segmenter_morpho 22 Embeddings _ 8000 diff --git a/UD_any/tokeparser_incremental_b6/train.bd b/UD_any/tokeparser_incremental_b6/train.bd new file mode 100644 index 0000000..963c311 --- /dev/null +++ b/UD_any/tokeparser_incremental_b6/train.bd @@ -0,0 +1,11 @@ +#Index Name ref/hyp dict Policy Must print?# +################################################### +0 ID hyp none FromZero 1 +1 FORM hyp form FromZero 1 +3 POS hyp pos FromZero 1 +4 XPOS hyp pos FromZero 1 +5 MORPHO hyp morpho FromZero 1 +2 LEMMA hyp form FromZero 1 +6 GOV hyp int FromZero 1 +7 LABEL hyp labels FromZero 1 +0 EOS hyp eos FromZero 0 -- GitLab