From 014779b18cc42227716916cb4495a864633c9a73 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 6 May 2020 19:02:59 +0200 Subject: [PATCH] Added module UppercaseRate --- common/include/utf8string.hpp | 1 + common/include/util.hpp | 2 + common/src/utf8string.cpp | 5 + common/src/util.cpp | 7 ++ torch_modules/include/ModularNetwork.hpp | 1 + torch_modules/include/UppercaseRateModule.hpp | 30 ++++++ torch_modules/src/ModularNetwork.cpp | 2 + torch_modules/src/UppercaseRateModule.cpp | 97 +++++++++++++++++++ 8 files changed, 145 insertions(+) create mode 100644 torch_modules/include/UppercaseRateModule.hpp create mode 100644 torch_modules/src/UppercaseRateModule.cpp diff --git a/common/include/utf8string.hpp b/common/include/utf8string.hpp index 2a7400e..21937d3 100644 --- a/common/include/utf8string.hpp +++ b/common/include/utf8string.hpp @@ -15,6 +15,7 @@ class utf8char : public std::array<char, 4> utf8char(); utf8char(const std::string & other); + utf8char(const char * other); utf8char & operator=(char other); utf8char & operator=(const std::string & other); bool operator==(char other); diff --git a/common/include/util.hpp b/common/include/util.hpp index 58b288a..88f2b89 100644 --- a/common/include/util.hpp +++ b/common/include/util.hpp @@ -54,6 +54,8 @@ bool isSeparator(utf8char c); bool isIllegal(utf8char c); +bool isUppercase(utf8char c); + std::string getTime(); template <typename T> diff --git a/common/src/utf8string.cpp b/common/src/utf8string.cpp index 42755db..374ff9d 100644 --- a/common/src/utf8string.cpp +++ b/common/src/utf8string.cpp @@ -12,6 +12,11 @@ util::utf8char::utf8char(const std::string & other) *this = other; } +util::utf8char::utf8char(const char * other) +{ + *this = std::string(other); +} + util::utf8char & util::utf8char::operator=(char other) { (*this)[0] = other; diff --git a/common/src/util.cpp b/common/src/util.cpp index d8b2281..50ee14a 100644 --- a/common/src/util.cpp +++ b/common/src/util.cpp @@ -205,3 +205,10 @@ bool util::choiceWithProbability(float probability) return (std::rand() % maxVal) < threshold; } +bool util::isUppercase(utf8char c) +{ + static std::set<utf8char> uppercaseChars{"A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","À","Á","Â","Ã","Ä","Å","Æ","Ç","È","É","Ê","Ë","Ì","Í","Î","Ï","Ð","Ñ","Ò","Ó","Ô","Õ","Ö","Ø","Ù","Ú","Û","Ü","Ý","Þ","Ā","Ă","Ą","Ć","Ĉ","Ċ","Č","Ď","Đ","Ē","Ĕ","Ė","Ę","Ě","Ĝ","Ğ","Ġ","Ģ","Ĥ","Ħ","Ĩ","Ī","Ĭ","Į","İ","IJ","Ĵ","Ķ","Ĺ","Ļ","Ľ","Ŀ","Ł","Ń","Ņ","Ň","Ŋ","Ō","Ŏ","Ő","Œ","Ŕ","Ŗ","Ř","Ś","Ŝ","Ş","Š","Ţ","Ť","Ŧ","Ũ","Ū","Ŭ","Ů","Ű","Ų","Ŵ","Ŷ","Ÿ","Ź","Ż","Ž","Ɓ","Ƃ","Ƅ","Ɔ","Ƈ","Ɖ","Ɗ","Ƌ","Ǝ","Ə","Ɛ","Ƒ","Ɠ","Ɣ","Ɩ","Ɨ","Ƙ","Ɯ","Ɲ","Ɵ","Ơ","Ƣ","Ƥ","Ʀ","Ƨ","Ʃ","Ƭ","Ʈ","Ư","Ʊ","Ʋ","Ƴ","Ƶ","Ʒ","Ƹ","Ƽ","DŽ","LJ","NJ","Ǎ","Ǐ","Ǒ","Ǔ","Ǖ","Ǘ","Ǚ","Ǜ","Ǟ","Ǡ","Ǣ","Ǥ","Ǧ","Ǩ","Ǫ","Ǭ","Ǯ","DZ","Ǵ","Ƕ","Ƿ","Ǹ","Ǻ","Ǽ","Ǿ","Ȁ","Ȃ","Ȅ","Ȇ","Ȉ","Ȋ","Ȍ","Ȏ","Ȑ","Ȓ","Ȕ","Ȗ","Ș","Ț","Ȝ","Ȟ","Ƞ","Ȣ","Ȥ","Ȧ","Ȩ","Ȫ","Ȭ","Ȯ","Ȱ","Ȳ","Ⱥ","Ȼ","Ƚ","Ⱦ","Ɂ","Ƀ","Ʉ","Ʌ","Ɇ","Ɉ","Ɋ","Ɍ","Ɏ","Ͱ","Ͳ","Ͷ","Ϳ","Ά","Έ","Ή","Ί","Ό","Ύ","Ώ","Α","Β","Γ","Δ","Ε","Ζ","Η","Θ","Ι","Κ","Λ","Μ","Ν","Ξ","Ο","Π","Ρ","Σ","Τ","Υ","Φ","Χ","Ψ","Ω","Ϊ","Ϋ","Ϗ","ϒ","ϓ","ϔ","Ϙ","Ϛ","Ϝ","Ϟ","Ϡ","Ϣ","Ϥ","Ϧ","Ϩ","Ϫ","Ϭ","Ϯ","ϴ","Ϸ","Ϲ","Ϻ","Ͻ","Ͼ","Ͽ","Ѐ","Ё","Ђ","Ѓ","Є","Ѕ","І","Ї","Ј","Љ","Њ","Ћ","Ќ","Ѝ","Ў","Џ","А","Б","В","Г","Д","Е","Ж","З","И","Й","К","Л","М","Н","О","П","Р","С","Т","У","Ф","Х","Ц","Ч","Ш","Щ","Ъ","Ы","Ь","Э","Ю","Я","Ѡ","Ѣ","Ѥ","Ѧ","Ѩ","Ѫ","Ѭ","Ѯ","Ѱ","Ѳ","Ѵ","Ѷ","Ѹ","Ѻ","Ѽ","Ѿ","Ҁ","Ҋ","Ҍ","Ҏ","Ґ","Ғ","Ҕ","Җ","Ҙ","Қ","Ҝ","Ҟ","Ҡ","Ң","Ҥ","Ҧ","Ҩ","Ҫ","Ҭ","Ү","Ұ","Ҳ","Ҵ","Ҷ","Ҹ","Һ","Ҽ","Ҿ","Ӏ","Ӂ","Ӄ","Ӆ","Ӈ","Ӊ","Ӌ","Ӎ","Ӑ","Ӓ","Ӕ","Ӗ","Ә","Ӛ","Ӝ","Ӟ","Ӡ","Ӣ","Ӥ","Ӧ","Ө","Ӫ","Ӭ","Ӯ","Ӱ","Ӳ","Ӵ","Ӷ","Ӹ","Ӻ","Ӽ","Ӿ","Ԁ","Ԃ","Ԅ","Ԇ","Ԉ","Ԋ","Ԍ","Ԏ","Ԑ","Ԓ","Ԕ","Ԗ","Ԙ","Ԛ","Ԝ","Ԟ","Ԡ","Ԣ","Ԥ","Ԧ","Ԩ","Ԫ","Ԭ","Ԯ","Ա","Բ","Գ","Դ","Ե","Զ","Է","Ը","Թ","Ժ","Ի","Լ","Խ","Ծ","Կ","Հ","Ձ","Ղ","Ճ","Մ","Յ","Ն","Շ","Ո","Չ","Պ","Ջ","Ռ","Ս","Վ","Տ","Ր","Ց","Ւ","Փ","Ք","Օ","Ֆ","Ⴀ","Ⴁ","Ⴂ","Ⴃ","Ⴄ","Ⴅ","Ⴆ","Ⴇ","Ⴈ","Ⴉ","Ⴊ","Ⴋ","Ⴌ","Ⴍ","Ⴎ","Ⴏ","Ⴐ","Ⴑ","Ⴒ","Ⴓ","Ⴔ","Ⴕ","Ⴖ","Ⴗ","Ⴘ","Ⴙ","Ⴚ","Ⴛ","Ⴜ","Ⴝ","Ⴞ","Ⴟ","Ⴠ","Ⴡ","Ⴢ","Ⴣ","Ⴤ","Ⴥ","Ⴧ","Ⴭ","Ꭰ","Ꭱ","Ꭲ","Ꭳ","Ꭴ","Ꭵ","Ꭶ","Ꭷ","Ꭸ","Ꭹ","Ꭺ","Ꭻ","Ꭼ","Ꭽ","Ꭾ","Ꭿ","Ꮀ","Ꮁ","Ꮂ","Ꮃ","Ꮄ","Ꮅ","Ꮆ","Ꮇ","Ꮈ","Ꮉ","Ꮊ","Ꮋ","Ꮌ","Ꮍ","Ꮎ","Ꮏ","Ꮐ","Ꮑ","Ꮒ","Ꮓ","Ꮔ","Ꮕ","Ꮖ","Ꮗ","Ꮘ","Ꮙ","Ꮚ","Ꮛ","Ꮜ","Ꮝ","Ꮞ","Ꮟ","Ꮠ","Ꮡ","Ꮢ","Ꮣ","Ꮤ","Ꮥ","Ꮦ","Ꮧ","Ꮨ","Ꮩ","Ꮪ","Ꮫ","Ꮬ","Ꮭ","Ꮮ","Ꮯ","Ꮰ","Ꮱ","Ꮲ","Ꮳ","Ꮴ","Ꮵ","Ꮶ","Ꮷ","Ꮸ","Ꮹ","Ꮺ","Ꮻ","Ꮼ","Ꮽ","Ꮾ","Ꮿ","Ᏸ","Ᏹ","Ᏺ","Ᏻ","Ᏼ","Ᏽ","Ა","Ბ","Გ","Დ","Ე","Ვ","Ზ","Თ","Ი","Კ","Ლ","Მ","Ნ","Ო","Პ","Ჟ","Რ","Ს","Ტ","Უ","Ფ","Ქ","Ღ","Ყ","Შ","Ჩ","Ც","Ძ","Წ","Ჭ","Ხ","Ჯ","Ჰ","Ჱ","Ჲ","Ჳ","Ჴ","Ჵ","Ჶ","Ჷ","Ჸ","Ჹ","Ჺ","Ჽ","Ჾ","Ჿ","Ḁ","Ḃ","Ḅ","Ḇ","Ḉ","Ḋ","Ḍ","Ḏ","Ḑ","Ḓ","Ḕ","Ḗ","Ḙ","Ḛ","Ḝ","Ḟ","Ḡ","Ḣ","Ḥ","Ḧ","Ḩ","Ḫ","Ḭ","Ḯ","Ḱ","Ḳ","Ḵ","Ḷ","Ḹ","Ḻ","Ḽ","Ḿ","Ṁ","Ṃ","Ṅ","Ṇ","Ṉ","Ṋ","Ṍ","Ṏ","Ṑ","Ṓ","Ṕ","Ṗ","Ṙ","Ṛ","Ṝ","Ṟ","Ṡ","Ṣ","Ṥ","Ṧ","Ṩ","Ṫ","Ṭ","Ṯ","Ṱ","Ṳ","Ṵ","Ṷ","Ṹ","Ṻ","Ṽ","Ṿ","Ẁ","Ẃ","Ẅ","Ẇ","Ẉ","Ẋ","Ẍ","Ẏ","Ẑ","Ẓ","Ẕ","ẞ","Ạ","Ả","Ấ","Ầ","Ẩ","Ẫ","Ậ","Ắ","Ằ","Ẳ","Ẵ","Ặ","Ẹ","Ẻ","Ẽ","Ế","Ề","Ể","Ễ","Ệ","Ỉ","Ị","Ọ","Ỏ","Ố","Ồ","Ổ","Ỗ","Ộ","Ớ","Ờ","Ở","Ỡ","Ợ","Ụ","Ủ","Ứ","Ừ","Ử","Ữ","Ự","Ỳ","Ỵ","Ỷ","Ỹ","Ỻ","Ỽ","Ỿ","Ἀ","Ἁ","Ἂ","Ἃ","Ἄ","Ἅ","Ἆ","Ἇ","Ἐ","Ἑ","Ἒ","Ἓ","Ἔ","Ἕ","Ἠ","Ἡ","Ἢ","Ἣ","Ἤ","Ἥ","Ἦ","Ἧ","Ἰ","Ἱ","Ἲ","Ἳ","Ἴ","Ἵ","Ἶ","Ἷ","Ὀ","Ὁ","Ὂ","Ὃ","Ὄ","Ὅ","Ὑ","Ὓ","Ὕ","Ὗ","Ὠ","Ὡ","Ὢ","Ὣ","Ὤ","Ὥ","Ὦ","Ὧ","Ᾰ","Ᾱ","Ὰ","Ά","Ὲ","Έ","Ὴ","Ή","Ῐ","Ῑ","Ὶ","Ί","Ῠ","Ῡ","Ὺ","Ύ","Ῥ","Ὸ","Ό","Ὼ","Ώ","ℂ","ℇ","ℋ","ℌ","ℍ","ℐ","ℑ","ℒ","ℕ","ℙ","ℚ","ℛ","ℜ","ℝ","ℤ","Ω","ℨ","K","Å","ℬ","ℭ","ℰ","ℱ","Ⅎ","ℳ","ℾ","ℿ","ⅅ","Ↄ","Ⰰ","Ⰱ","Ⰲ","Ⰳ","Ⰴ","Ⰵ","Ⰶ","Ⰷ","Ⰸ","Ⰹ","Ⰺ","Ⰻ","Ⰼ","Ⰽ","Ⰾ","Ⰿ","Ⱀ","Ⱁ","Ⱂ","Ⱃ","Ⱄ","Ⱅ","Ⱆ","Ⱇ","Ⱈ","Ⱉ","Ⱊ","Ⱋ","Ⱌ","Ⱍ","Ⱎ","Ⱏ","Ⱐ","Ⱑ","Ⱒ","Ⱓ","Ⱔ","Ⱕ","Ⱖ","Ⱗ","Ⱘ","Ⱙ","Ⱚ","Ⱛ","Ⱜ","Ⱝ","Ⱞ","Ⱡ","Ɫ","Ᵽ","Ɽ","Ⱨ","Ⱪ","Ⱬ","Ɑ","Ɱ","Ɐ","Ɒ","Ⱳ","Ⱶ","Ȿ","Ɀ","Ⲁ","Ⲃ","Ⲅ","Ⲇ","Ⲉ","Ⲋ","Ⲍ","Ⲏ","Ⲑ","Ⲓ","Ⲕ","Ⲗ","Ⲙ","Ⲛ","Ⲝ","Ⲟ","Ⲡ","Ⲣ","Ⲥ","Ⲧ","Ⲩ","Ⲫ","Ⲭ","Ⲯ","Ⲱ","Ⲳ","Ⲵ","Ⲷ","Ⲹ","Ⲻ","Ⲽ","Ⲿ","Ⳁ","Ⳃ","Ⳅ","Ⳇ","Ⳉ","Ⳋ","Ⳍ","Ⳏ","Ⳑ","Ⳓ","Ⳕ","Ⳗ","Ⳙ","Ⳛ","Ⳝ","Ⳟ","Ⳡ","Ⳣ","Ⳬ","Ⳮ","Ⳳ","Ꙁ","Ꙃ","Ꙅ","Ꙇ","Ꙉ","Ꙋ","Ꙍ","Ꙏ","Ꙑ","Ꙓ","Ꙕ","Ꙗ","Ꙙ","Ꙛ","Ꙝ","Ꙟ","Ꙡ","Ꙣ","Ꙥ","Ꙧ","Ꙩ","Ꙫ","Ꙭ","Ꚁ","Ꚃ","Ꚅ","Ꚇ","Ꚉ","Ꚋ","Ꚍ","Ꚏ","Ꚑ","Ꚓ","Ꚕ","Ꚗ","Ꚙ","Ꚛ","Ꜣ","Ꜥ","Ꜧ","Ꜩ","Ꜫ","Ꜭ","Ꜯ","Ꜳ","Ꜵ","Ꜷ","Ꜹ","Ꜻ","Ꜽ","Ꜿ","Ꝁ","Ꝃ","Ꝅ","Ꝇ","Ꝉ","Ꝋ","Ꝍ","Ꝏ","Ꝑ","Ꝓ","Ꝕ","Ꝗ","Ꝙ","Ꝛ","Ꝝ","Ꝟ","Ꝡ","Ꝣ","Ꝥ","Ꝧ","Ꝩ","Ꝫ","Ꝭ","Ꝯ","Ꝺ","Ꝼ","Ᵹ","Ꝿ","Ꞁ","Ꞃ","Ꞅ","Ꞇ","Ꞌ","Ɥ","Ꞑ","Ꞓ","Ꞗ","Ꞙ","Ꞛ","Ꞝ","Ꞟ","Ꞡ","Ꞣ","Ꞥ","Ꞧ","Ꞩ","Ɦ","Ɜ","Ɡ","Ɬ","Ɪ","Ʞ","Ʇ","Ʝ","Ꭓ","Ꞵ","Ꞷ","Ꞹ","Ꞻ","Ꞽ","Ꞿ","Ꟃ","Ꞔ","Ʂ","Ᶎ","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","𐐀","𐐁","𐐂","𐐃","𐐄","𐐅","𐐆","𐐇","𐐈","𐐉","𐐊","𐐋","𐐌","𐐍","𐐎","𐐏","𐐐","𐐑","𐐒","𐐓","𐐔","𐐕","𐐖","𐐗","𐐘","𐐙","𐐚","𐐛","𐐜","𐐝","𐐞","𐐟","𐐠","𐐡","𐐢","𐐣","𐐤","𐐥","𐐦","𐐧","𐒰","𐒱","𐒲","𐒳","𐒴","𐒵","𐒶","𐒷","𐒸","𐒹","𐒺","𐒻","𐒼","𐒽","𐒾","𐒿","𐓀","𐓁","𐓂","𐓃","𐓄","𐓅","𐓆","𐓇","𐓈","𐓉","𐓊","𐓋","𐓌","𐓍","𐓎","𐓏","𐓐","𐓑","𐓒","𐓓","𐲀","𐲁","𐲂","𐲃","𐲄","𐲅","𐲆","𐲇","𐲈","𐲉","𐲊","𐲋","𐲌","𐲍","𐲎","𐲏","𐲐","𐲑","𐲒","𐲓","𐲔","𐲕","𐲖","𐲗","𐲘","𐲙","𐲚","𐲛","𐲜","𐲝","𐲞","𐲟","𐲠","𐲡","𐲢","𐲣","𐲤","𐲥","𐲦","𐲧","𐲨","𐲩","𐲪","𐲫","𐲬","𐲭","𐲮","𐲯","𐲰","𐲱","𐲲","𑢠","𑢡","𑢢","𑢣","𑢤","𑢥","𑢦","𑢧","𑢨","𑢩","𑢪","𑢫","𑢬","𑢭","𑢮","𑢯","𑢰","𑢱","𑢲","𑢳","𑢴","𑢵","𑢶","𑢷","𑢸","𑢹","𑢺","𑢻","𑢼","𑢽","𑢾","𑢿","𖹀","𖹁","𖹂","𖹃","𖹄","𖹅","𖹆","𖹇","𖹈","𖹉","𖹊","𖹋","𖹌","𖹍","𖹎","𖹏","𖹐","𖹑","𖹒","𖹓","𖹔","𖹕","𖹖","𖹗","𖹘","𖹙","𖹚","𖹛","𖹜","𖹝","𖹞","𖹟","𝐀","𝐁","𝐂","𝐃","𝐄","𝐅","𝐆","𝐇","𝐈","𝐉","𝐊","𝐋","𝐌","𝐍","𝐎","𝐏","𝐐","𝐑","𝐒","𝐓","𝐔","𝐕","𝐖","𝐗","𝐘","𝐙","𝐴","𝐵","𝐶","𝐷","𝐸","𝐹","𝐺","𝐻","𝐼","𝐽","𝐾","𝐿","𝑀","𝑁","𝑂","𝑃","𝑄","𝑅","𝑆","𝑇","𝑈","𝑉","𝑊","𝑋","𝑌","𝑍","𝑨","𝑩","𝑪","𝑫","𝑬","𝑭","𝑮","𝑯","𝑰","𝑱","𝑲","𝑳","𝑴","𝑵","𝑶","𝑷","𝑸","𝑹","𝑺","𝑻","𝑼","𝑽","𝑾","𝑿","𝒀","𝒁","𝒜","𝒞","𝒟","𝒢","𝒥","𝒦","𝒩","𝒪","𝒫","𝒬","𝒮","𝒯","𝒰","𝒱","𝒲","𝒳","𝒴","𝒵","𝓐","𝓑","𝓒","𝓓","𝓔","𝓕","𝓖","𝓗","𝓘","𝓙","𝓚","𝓛","𝓜","𝓝","𝓞","𝓟","𝓠","𝓡","𝓢","𝓣","𝓤","𝓥","𝓦","𝓧","𝓨","𝓩","𝔄","𝔅","𝔇","𝔈","𝔉","𝔊","𝔍","𝔎","𝔏","𝔐","𝔑","𝔒","𝔓","𝔔","𝔖","𝔗","𝔘","𝔙","𝔚","𝔛","𝔜","𝔸","𝔹","𝔻","𝔼","𝔽","𝔾","𝕀","𝕁","𝕂","𝕃","𝕄","𝕆","𝕊","𝕋","𝕌","𝕍","𝕎","𝕏","𝕐","𝕬","𝕭","𝕮","𝕯","𝕰","𝕱","𝕲","𝕳","𝕴","𝕵","𝕶","𝕷","𝕸","𝕹","𝕺","𝕻","𝕼","𝕽","𝕾","𝕿","𝖀","𝖁","𝖂","𝖃","𝖄","𝖅","𝖠","𝖡","𝖢","𝖣","𝖤","𝖥","𝖦","𝖧","𝖨","𝖩","𝖪","𝖫","𝖬","𝖭","𝖮","𝖯","𝖰","𝖱","𝖲","𝖳","𝖴","𝖵","𝖶","𝖷","𝖸","𝖹","𝗔","𝗕","𝗖","𝗗","𝗘","𝗙","𝗚","𝗛","𝗜","𝗝","𝗞","𝗟","𝗠","𝗡","𝗢","𝗣","𝗤","𝗥","𝗦","𝗧","𝗨","𝗩","𝗪","𝗫","𝗬","𝗭","𝘈","𝘉","𝘊","𝘋","𝘌","𝘍","𝘎","𝘏","𝘐","𝘑","𝘒","𝘓","𝘔","𝘕","𝘖","𝘗","𝘘","𝘙","𝘚","𝘛","𝘜","𝘝","𝘞","𝘟","𝘠","𝘡","𝘼","𝘽","𝘾","𝘿","𝙀","𝙁","𝙂","𝙃","𝙄","𝙅","𝙆","𝙇","𝙈","𝙉","𝙊","𝙋","𝙌","𝙍","𝙎","𝙏","𝙐","𝙑","𝙒","𝙓","𝙔","𝙕","𝙰","𝙱","𝙲","𝙳","𝙴","𝙵","𝙶","𝙷","𝙸","𝙹","𝙺","𝙻","𝙼","𝙽","𝙾","𝙿","𝚀","𝚁","𝚂","𝚃","𝚄","𝚅","𝚆","𝚇","𝚈","𝚉","𝚨","𝚩","𝚪","𝚫","𝚬","𝚭","𝚮","𝚯","𝚰","𝚱","𝚲","𝚳","𝚴","𝚵","𝚶","𝚷","𝚸","𝚹","𝚺","𝚻","𝚼","𝚽","𝚾","𝚿","𝛀","𝛢","𝛣","𝛤","𝛥","𝛦","𝛧","𝛨","𝛩","𝛪","𝛫","𝛬","𝛭","𝛮","𝛯","𝛰","𝛱","𝛲","𝛳","𝛴","𝛵","𝛶","𝛷","𝛸","𝛹","𝛺","𝜜","𝜝","𝜞","𝜟","𝜠","𝜡","𝜢","𝜣","𝜤","𝜥","𝜦","𝜧","𝜨","𝜩","𝜪","𝜫","𝜬","𝜭","𝜮","𝜯","𝜰","𝜱","𝜲","𝜳","𝜴","𝝖","𝝗","𝝘","𝝙","𝝚","𝝛","𝝜","𝝝","𝝞","𝝟","𝝠","𝝡","𝝢","𝝣","𝝤","𝝥","𝝦","𝝧","𝝨","𝝩","𝝪","𝝫","𝝬","𝝭","𝝮","𝞐","𝞑","𝞒","𝞓","𝞔","𝞕","𝞖","𝞗","𝞘","𝞙","𝞚","𝞛","𝞜","𝞝","𝞞","𝞟","𝞠","𝞡","𝞢","𝞣","𝞤","𝞥","𝞦","𝞧","𝞨","𝟊","𞤀","𞤁","𞤂","𞤃","𞤄","𞤅","𞤆","𞤇","𞤈","𞤉","𞤊","𞤋","𞤌","𞤍","𞤎","𞤏","𞤐","𞤑","𞤒","𞤓","𞤔","𞤕","𞤖","𞤗","𞤘","𞤙","𞤚","𞤛","𞤜","𞤝","𞤞","𞤟","𞤠","𞤡"}; + + return uppercaseChars.count(c); +} + diff --git a/torch_modules/include/ModularNetwork.hpp b/torch_modules/include/ModularNetwork.hpp index 11a161e..70c159c 100644 --- a/torch_modules/include/ModularNetwork.hpp +++ b/torch_modules/include/ModularNetwork.hpp @@ -8,6 +8,7 @@ #include "FocusedColumnModule.hpp" #include "DepthLayerTreeEmbeddingModule.hpp" #include "StateNameModule.hpp" +#include "UppercaseRateModule.hpp" #include "MLP.hpp" class ModularNetworkImpl : public NeuralNetworkImpl diff --git a/torch_modules/include/UppercaseRateModule.hpp b/torch_modules/include/UppercaseRateModule.hpp new file mode 100644 index 0000000..5f174ef --- /dev/null +++ b/torch_modules/include/UppercaseRateModule.hpp @@ -0,0 +1,30 @@ +#ifndef UPPERCASERATEMODULE__H +#define UPPERCASERATEMODULE__H + +#include <torch/torch.h> +#include "Submodule.hpp" +#include "MyModule.hpp" +#include "LSTM.hpp" +#include "GRU.hpp" + +class UppercaseRateModuleImpl : public Submodule +{ + private : + + int outSize; + std::vector<int> focusedBuffer, focusedStack; + std::shared_ptr<MyModule> myModule{nullptr}; + + public : + + UppercaseRateModuleImpl(std::string name, const std::string & definition); + torch::Tensor forward(torch::Tensor input); + std::size_t getOutputSize() override; + std::size_t getInputSize() override; + void addToContext(std::vector<std::vector<long>> & context, const Config & config) override; + void registerEmbeddings() override; +}; +TORCH_MODULE(UppercaseRateModule); + +#endif + diff --git a/torch_modules/src/ModularNetwork.cpp b/torch_modules/src/ModularNetwork.cpp index db8d9d0..5edf1b4 100644 --- a/torch_modules/src/ModularNetwork.cpp +++ b/torch_modules/src/ModularNetwork.cpp @@ -27,6 +27,8 @@ ModularNetworkImpl::ModularNetworkImpl(std::string name, std::map<std::string,st modules.emplace_back(register_module(name, ContextModule(nameH, splited.second))); else if (splited.first == "StateName") modules.emplace_back(register_module(name, StateNameModule(nameH, splited.second))); + else if (splited.first == "UppercaseRate") + modules.emplace_back(register_module(name, UppercaseRateModule(nameH, splited.second))); else if (splited.first == "Focused") modules.emplace_back(register_module(name, FocusedColumnModule(nameH, splited.second))); else if (splited.first == "RawInput") diff --git a/torch_modules/src/UppercaseRateModule.cpp b/torch_modules/src/UppercaseRateModule.cpp new file mode 100644 index 0000000..70f1bea --- /dev/null +++ b/torch_modules/src/UppercaseRateModule.cpp @@ -0,0 +1,97 @@ +#include "UppercaseRateModule.hpp" +#include "NeuralNetwork.hpp" + +UppercaseRateModuleImpl::UppercaseRateModuleImpl(std::string name, const std::string & definition) +{ + setName(name); + std::regex regex("(?:(?:\\s|\\t)*)Buffer\\{(.*)\\}(?:(?:\\s|\\t)*)Stack\\{(.*)\\}(?:(?:\\s|\\t)*)(\\S+)\\{(.*)\\}(?:(?:\\s|\\t)*)Out\\{(.*)\\}(?:(?:\\s|\\t)*)"); + if (!util::doIfNameMatch(regex, definition, [this,&definition](auto sm) + { + try + { + for (auto & index : util::split(sm.str(1), ' ')) + focusedBuffer.emplace_back(std::stoi(index)); + + for (auto & index : util::split(sm.str(2), ' ')) + focusedStack.emplace_back(std::stoi(index)); + + auto subModuleType = sm.str(3); + auto subModuleArguments = util::split(sm.str(4), ' '); + + auto options = MyModule::ModuleOptions(true) + .bidirectional(std::stoi(subModuleArguments[0])) + .num_layers(std::stoi(subModuleArguments[1])) + .dropout(std::stof(subModuleArguments[2])) + .complete(std::stoi(subModuleArguments[3])); + + int outSize = std::stoi(sm.str(5)); + + if (subModuleType == "LSTM") + myModule = register_module("myModule", LSTM(1, outSize, options)); + else if (subModuleType == "GRU") + myModule = register_module("myModule", GRU(1, outSize, options)); + else + util::myThrow(fmt::format("unknown sumodule type '{}'", subModuleType)); + } catch (std::exception & e) {util::myThrow(fmt::format("{} in '{}'",e.what(),definition));} + })) + util::myThrow(fmt::format("invalid definition '{}'", definition)); +} + +torch::Tensor UppercaseRateModuleImpl::forward(torch::Tensor input) +{ + auto context = input.narrow(1, firstInputIndex, getInputSize()); + void * dataPtr = context.flatten().data_ptr(); + auto values = torch::from_blob(dataPtr, {(long)(context.size(0)*getInputSize())}, torch::kDouble).clone().to(torch::kFloat).to(NeuralNetworkImpl::device).view({(long)context.size(0), (long)context.size(1), 1}); + return myModule->forward(values); +} + +std::size_t UppercaseRateModuleImpl::getOutputSize() +{ + return myModule->getOutputSize(getInputSize()); +} + +std::size_t UppercaseRateModuleImpl::getInputSize() +{ + return focusedBuffer.size() + focusedStack.size(); +} + +void UppercaseRateModuleImpl::addToContext(std::vector<std::vector<long>> & context, const Config & config) +{ + std::vector<long> focusedIndexes; + + for (int index : focusedBuffer) + focusedIndexes.emplace_back(config.getRelativeWordIndex(index)); + + for (int index : focusedStack) + if (config.hasStack(index)) + focusedIndexes.emplace_back(config.getStack(index)); + else + focusedIndexes.emplace_back(-1); + + for (auto & contextElement : context) + { + for (auto index : focusedIndexes) + { + double res = -1.0; + if (index >= 0) + { + auto word = util::splitAsUtf8(config.getAsFeature("FORM", index).get()); + int nbUpper = 0; + for (auto & letter : word) + if (util::isUppercase(letter)) + nbUpper++; + if (word.size() > 0) + res = 1.0*nbUpper/word.size(); + } + + contextElement.emplace_back(0); + std::memcpy(&contextElement.back(), &res, sizeof res); + } + } + +} + +void UppercaseRateModuleImpl::registerEmbeddings() +{ +} + -- GitLab