diff --git a/common/include/upper2lower b/common/include/upper2lower new file mode 100644 index 0000000000000000000000000000000000000000..dfde4b913f3d730ca65ff1c49559bc92762c4986 --- /dev/null +++ b/common/include/upper2lower @@ -0,0 +1,1797 @@ + +namespace util +{ + +std::map<utf8char, utf8char> upper2lower +{ + {"A", "a"}, + {"B", "b"}, + {"C", "c"}, + {"D", "d"}, + {"E", "e"}, + {"F", "f"}, + {"G", "g"}, + {"H", "h"}, + {"I", "i"}, + {"J", "j"}, + {"K", "k"}, + {"L", "l"}, + {"M", "m"}, + {"N", "n"}, + {"O", "o"}, + {"P", "p"}, + {"Q", "q"}, + {"R", "r"}, + {"S", "s"}, + {"T", "t"}, + {"U", "u"}, + {"V", "v"}, + {"W", "w"}, + {"X", "x"}, + {"Y", "y"}, + {"Z", "z"}, + {"À", "à"}, + {"Á", "á"}, + {"Â", "â"}, + {"Ã", "ã"}, + {"Ä", "ä"}, + {"Å", "å"}, + {"Æ", "æ"}, + {"Ç", "ç"}, + {"È", "è"}, + {"É", "é"}, + {"Ê", "ê"}, + {"Ë", "ë"}, + {"Ì", "ì"}, + {"Í", "í"}, + {"Î", "î"}, + {"Ï", "ï"}, + {"Ð", "ð"}, + {"Ñ", "ñ"}, + {"Ò", "ò"}, + {"Ó", "ó"}, + {"Ô", "ô"}, + {"Õ", "õ"}, + {"Ö", "ö"}, + {"Ø", "ø"}, + {"Ù", "ù"}, + {"Ú", "ú"}, + {"Û", "û"}, + {"Ü", "ü"}, + {"Ý", "ý"}, + {"Þ", "þ"}, + {"Ā", "ā"}, + {"Ă", "ă"}, + {"Ą", "ą"}, + {"Ć", "ć"}, + {"Ĉ", "ĉ"}, + {"Ċ", "ċ"}, + {"Č", "č"}, + {"Ď", "ď"}, + {"Đ", "đ"}, + {"Ē", "ē"}, + {"Ĕ", "ĕ"}, + {"Ė", "ė"}, + {"Ę", "ę"}, + {"Ě", "ě"}, + {"Ĝ", "ĝ"}, + {"Ğ", "ğ"}, + {"Ġ", "ġ"}, + {"Ģ", "ģ"}, + {"Ĥ", "ĥ"}, + {"Ħ", "ħ"}, + {"Ĩ", "ĩ"}, + {"Ī", "ī"}, + {"Ĭ", "ĭ"}, + {"Į", "į"}, + {"IJ", "ij"}, + {"Ĵ", "ĵ"}, + {"Ķ", "ķ"}, + {"Ĺ", "ĺ"}, + {"Ļ", "ļ"}, + {"Ľ", "ľ"}, + {"Ŀ", "ŀ"}, + {"Ł", "ł"}, + {"Ń", "ń"}, + {"Ņ", "ņ"}, + {"Ň", "ň"}, + {"Ŋ", "ŋ"}, + {"Ō", "ō"}, + {"Ŏ", "ŏ"}, + {"Ő", "ő"}, + {"Œ", "œ"}, + {"Ŕ", "ŕ"}, + {"Ŗ", "ŗ"}, + {"Ř", "ř"}, + {"Ś", "ś"}, + {"Ŝ", "ŝ"}, + {"Ş", "ş"}, + {"Š", "š"}, + {"Ţ", "ţ"}, + {"Ť", "ť"}, + {"Ŧ", "ŧ"}, + {"Ũ", "ũ"}, + {"Ū", "ū"}, + {"Ŭ", "ŭ"}, + {"Ů", "ů"}, + {"Ű", "ű"}, + {"Ų", "ų"}, + {"Ŵ", "ŵ"}, + {"Ŷ", "ŷ"}, + {"Ÿ", "ÿ"}, + {"Ź", "ź"}, + {"Ż", "ż"}, + {"Ž", "ž"}, + {"Ɓ", "ɓ"}, + {"Ƃ", "ƃ"}, + {"Ƅ", "ƅ"}, + {"Ɔ", "ɔ"}, + {"Ƈ", "ƈ"}, + {"Ɖ", "ɖ"}, + {"Ɗ", "ɗ"}, + {"Ƌ", "ƌ"}, + {"Ǝ", "ǝ"}, + {"Ə", "ə"}, + {"Ɛ", "ɛ"}, + {"Ƒ", "ƒ"}, + {"Ɠ", "ɠ"}, + {"Ɣ", "ɣ"}, + {"Ɩ", "ɩ"}, + {"Ɨ", "ɨ"}, + {"Ƙ", "ƙ"}, + {"Ɯ", "ɯ"}, + {"Ɲ", "ɲ"}, + {"Ɵ", "ɵ"}, + {"Ơ", "ơ"}, + {"Ƣ", "ƣ"}, + {"Ƥ", "ƥ"}, + {"Ʀ", "ʀ"}, + {"Ƨ", "ƨ"}, + {"Ʃ", "ʃ"}, + {"Ƭ", "ƭ"}, + {"Ʈ", "ʈ"}, + {"Ư", "ư"}, + {"Ʊ", "ʊ"}, + {"Ʋ", "ʋ"}, + {"Ƴ", "ƴ"}, + {"Ƶ", "ƶ"}, + {"Ʒ", "ʒ"}, + {"Ƹ", "ƹ"}, + {"Ƽ", "ƽ"}, + {"DŽ", "dž"}, + {"LJ", "lj"}, + {"NJ", "nj"}, + {"Ǎ", "ǎ"}, + {"Ǐ", "ǐ"}, + {"Ǒ", "ǒ"}, + {"Ǔ", "ǔ"}, + {"Ǖ", "ǖ"}, + {"Ǘ", "ǘ"}, + {"Ǚ", "ǚ"}, + {"Ǜ", "ǜ"}, + {"Ǟ", "ǟ"}, + {"Ǡ", "ǡ"}, + {"Ǣ", "ǣ"}, + {"Ǥ", "ǥ"}, + {"Ǧ", "ǧ"}, + {"Ǩ", "ǩ"}, + {"Ǫ", "ǫ"}, + {"Ǭ", "ǭ"}, + {"Ǯ", "ǯ"}, + {"DZ", "dz"}, + {"Ǵ", "ǵ"}, + {"Ƕ", "ƕ"}, + {"Ƿ", "ƿ"}, + {"Ǹ", "ǹ"}, + {"Ǻ", "ǻ"}, + {"Ǽ", "ǽ"}, + {"Ǿ", "ǿ"}, + {"Ȁ", "ȁ"}, + {"Ȃ", "ȃ"}, + {"Ȅ", "ȅ"}, + {"Ȇ", "ȇ"}, + {"Ȉ", "ȉ"}, + {"Ȋ", "ȋ"}, + {"Ȍ", "ȍ"}, + {"Ȏ", "ȏ"}, + {"Ȑ", "ȑ"}, + {"Ȓ", "ȓ"}, + {"Ȕ", "ȕ"}, + {"Ȗ", "ȗ"}, + {"Ș", "ș"}, + {"Ț", "ț"}, + {"Ȝ", "ȝ"}, + {"Ȟ", "ȟ"}, + {"Ƞ", "ƞ"}, + {"Ȣ", "ȣ"}, + {"Ȥ", "ȥ"}, + {"Ȧ", "ȧ"}, + {"Ȩ", "ȩ"}, + {"Ȫ", "ȫ"}, + {"Ȭ", "ȭ"}, + {"Ȯ", "ȯ"}, + {"Ȱ", "ȱ"}, + {"Ȳ", "ȳ"}, + {"Ⱥ", "ⱥ"}, + {"Ȼ", "ȼ"}, + {"Ƚ", "ƚ"}, + {"Ⱦ", "ⱦ"}, + {"Ɂ", "ɂ"}, + {"Ƀ", "ƀ"}, + {"Ʉ", "ʉ"}, + {"Ʌ", "ʌ"}, + {"Ɇ", "ɇ"}, + {"Ɉ", "ɉ"}, + {"Ɋ", "ɋ"}, + {"Ɍ", "ɍ"}, + {"Ɏ", "ɏ"}, + {"Ͱ", "ͱ"}, + {"Ͳ", "ͳ"}, + {"Ͷ", "ͷ"}, + {"Ϳ", "ϳ"}, + {"Ά", "ά"}, + {"Έ", "έ"}, + {"Ή", "ή"}, + {"Ί", "ί"}, + {"Ό", "ό"}, + {"Ύ", "ύ"}, + {"Ώ", "ώ"}, + {"Α", "α"}, + {"Β", "β"}, + {"Γ", "γ"}, + {"Δ", "δ"}, + {"Ε", "ε"}, + {"Ζ", "ζ"}, + {"Η", "η"}, + {"Θ", "θ"}, + {"Ι", "ι"}, + {"Κ", "κ"}, + {"Λ", "λ"}, + {"Μ", "μ"}, + {"Ν", "ν"}, + {"Ξ", "ξ"}, + {"Ο", "ο"}, + {"Π", "π"}, + {"Ρ", "ρ"}, + {"Σ", "σ"}, + {"Τ", "τ"}, + {"Υ", "υ"}, + {"Φ", "φ"}, + {"Χ", "χ"}, + {"Ψ", "ψ"}, + {"Ω", "ω"}, + {"Ϊ", "ϊ"}, + {"Ϋ", "ϋ"}, + {"Ϗ", "ϗ"}, + {"ϒ", "ϒ"}, + {"ϓ", "ϓ"}, + {"ϔ", "ϔ"}, + {"Ϙ", "ϙ"}, + {"Ϛ", "ϛ"}, + {"Ϝ", "ϝ"}, + {"Ϟ", "ϟ"}, + {"Ϡ", "ϡ"}, + {"Ϣ", "ϣ"}, + {"Ϥ", "ϥ"}, + {"Ϧ", "ϧ"}, + {"Ϩ", "ϩ"}, + {"Ϫ", "ϫ"}, + {"Ϭ", "ϭ"}, + {"Ϯ", "ϯ"}, + {"ϴ", "θ"}, + {"Ϸ", "ϸ"}, + {"Ϲ", "ϲ"}, + {"Ϻ", "ϻ"}, + {"Ͻ", "ͻ"}, + {"Ͼ", "ͼ"}, + {"Ͽ", "ͽ"}, + {"Ѐ", "ѐ"}, + {"Ё", "ё"}, + {"Ђ", "ђ"}, + {"Ѓ", "ѓ"}, + {"Є", "є"}, + {"Ѕ", "ѕ"}, + {"І", "і"}, + {"Ї", "ї"}, + {"Ј", "ј"}, + {"Љ", "љ"}, + {"Њ", "њ"}, + {"Ћ", "ћ"}, + {"Ќ", "ќ"}, + {"Ѝ", "ѝ"}, + {"Ў", "ў"}, + {"Џ", "џ"}, + {"А", "а"}, + {"Б", "б"}, + {"В", "в"}, + {"Г", "г"}, + {"Д", "д"}, + {"Е", "е"}, + {"Ж", "ж"}, + {"З", "з"}, + {"И", "и"}, + {"Й", "й"}, + {"К", "к"}, + {"Л", "л"}, + {"М", "м"}, + {"Н", "н"}, + {"О", "о"}, + {"П", "п"}, + {"Р", "р"}, + {"С", "с"}, + {"Т", "т"}, + {"У", "у"}, + {"Ф", "ф"}, + {"Х", "х"}, + {"Ц", "ц"}, + {"Ч", "ч"}, + {"Ш", "ш"}, + {"Щ", "щ"}, + {"Ъ", "ъ"}, + {"Ы", "ы"}, + {"Ь", "ь"}, + {"Э", "э"}, + {"Ю", "ю"}, + {"Я", "я"}, + {"Ѡ", "ѡ"}, + {"Ѣ", "ѣ"}, + {"Ѥ", "ѥ"}, + {"Ѧ", "ѧ"}, + {"Ѩ", "ѩ"}, + {"Ѫ", "ѫ"}, + {"Ѭ", "ѭ"}, + {"Ѯ", "ѯ"}, + {"Ѱ", "ѱ"}, + {"Ѳ", "ѳ"}, + {"Ѵ", "ѵ"}, + {"Ѷ", "ѷ"}, + {"Ѹ", "ѹ"}, + {"Ѻ", "ѻ"}, + {"Ѽ", "ѽ"}, + {"Ѿ", "ѿ"}, + {"Ҁ", "ҁ"}, + {"Ҋ", "ҋ"}, + {"Ҍ", "ҍ"}, + {"Ҏ", "ҏ"}, + {"Ґ", "ґ"}, + {"Ғ", "ғ"}, + {"Ҕ", "ҕ"}, + {"Җ", "җ"}, + {"Ҙ", "ҙ"}, + {"Қ", "қ"}, + {"Ҝ", "ҝ"}, + {"Ҟ", "ҟ"}, + {"Ҡ", "ҡ"}, + {"Ң", "ң"}, + {"Ҥ", "ҥ"}, + {"Ҧ", "ҧ"}, + {"Ҩ", "ҩ"}, + {"Ҫ", "ҫ"}, + {"Ҭ", "ҭ"}, + {"Ү", "ү"}, + {"Ұ", "ұ"}, + {"Ҳ", "ҳ"}, + {"Ҵ", "ҵ"}, + {"Ҷ", "ҷ"}, + {"Ҹ", "ҹ"}, + {"Һ", "һ"}, + {"Ҽ", "ҽ"}, + {"Ҿ", "ҿ"}, + {"Ӏ", "ӏ"}, + {"Ӂ", "ӂ"}, + {"Ӄ", "ӄ"}, + {"Ӆ", "ӆ"}, + {"Ӈ", "ӈ"}, + {"Ӊ", "ӊ"}, + {"Ӌ", "ӌ"}, + {"Ӎ", "ӎ"}, + {"Ӑ", "ӑ"}, + {"Ӓ", "ӓ"}, + {"Ӕ", "ӕ"}, + {"Ӗ", "ӗ"}, + {"Ә", "ә"}, + {"Ӛ", "ӛ"}, + {"Ӝ", "ӝ"}, + {"Ӟ", "ӟ"}, + {"Ӡ", "ӡ"}, + {"Ӣ", "ӣ"}, + {"Ӥ", "ӥ"}, + {"Ӧ", "ӧ"}, + {"Ө", "ө"}, + {"Ӫ", "ӫ"}, + {"Ӭ", "ӭ"}, + {"Ӯ", "ӯ"}, + {"Ӱ", "ӱ"}, + {"Ӳ", "ӳ"}, + {"Ӵ", "ӵ"}, + {"Ӷ", "ӷ"}, + {"Ӹ", "ӹ"}, + {"Ӻ", "ӻ"}, + {"Ӽ", "ӽ"}, + {"Ӿ", "ӿ"}, + {"Ԁ", "ԁ"}, + {"Ԃ", "ԃ"}, + {"Ԅ", "ԅ"}, + {"Ԇ", "ԇ"}, + {"Ԉ", "ԉ"}, + {"Ԋ", "ԋ"}, + {"Ԍ", "ԍ"}, + {"Ԏ", "ԏ"}, + {"Ԑ", "ԑ"}, + {"Ԓ", "ԓ"}, + {"Ԕ", "ԕ"}, + {"Ԗ", "ԗ"}, + {"Ԙ", "ԙ"}, + {"Ԛ", "ԛ"}, + {"Ԝ", "ԝ"}, + {"Ԟ", "ԟ"}, + {"Ԡ", "ԡ"}, + {"Ԣ", "ԣ"}, + {"Ԥ", "ԥ"}, + {"Ԧ", "ԧ"}, + {"Ԩ", "ԩ"}, + {"Ԫ", "ԫ"}, + {"Ԭ", "ԭ"}, + {"Ԯ", "ԯ"}, + {"Ա", "ա"}, + {"Բ", "բ"}, + {"Գ", "գ"}, + {"Դ", "դ"}, + {"Ե", "ե"}, + {"Զ", "զ"}, + {"Է", "է"}, + {"Ը", "ը"}, + {"Թ", "թ"}, + {"Ժ", "ժ"}, + {"Ի", "ի"}, + {"Լ", "լ"}, + {"Խ", "խ"}, + {"Ծ", "ծ"}, + {"Կ", "կ"}, + {"Հ", "հ"}, + {"Ձ", "ձ"}, + {"Ղ", "ղ"}, + {"Ճ", "ճ"}, + {"Մ", "մ"}, + {"Յ", "յ"}, + {"Ն", "ն"}, + {"Շ", "շ"}, + {"Ո", "ո"}, + {"Չ", "չ"}, + {"Պ", "պ"}, + {"Ջ", "ջ"}, + {"Ռ", "ռ"}, + {"Ս", "ս"}, + {"Վ", "վ"}, + {"Տ", "տ"}, + {"Ր", "ր"}, + {"Ց", "ց"}, + {"Ւ", "ւ"}, + {"Փ", "փ"}, + {"Ք", "ք"}, + {"Օ", "օ"}, + {"Ֆ", "ֆ"}, + {"Ⴀ", "ⴀ"}, + {"Ⴁ", "ⴁ"}, + {"Ⴂ", "ⴂ"}, + {"Ⴃ", "ⴃ"}, + {"Ⴄ", "ⴄ"}, + {"Ⴅ", "ⴅ"}, + {"Ⴆ", "ⴆ"}, + {"Ⴇ", "ⴇ"}, + {"Ⴈ", "ⴈ"}, + {"Ⴉ", "ⴉ"}, + {"Ⴊ", "ⴊ"}, + {"Ⴋ", "ⴋ"}, + {"Ⴌ", "ⴌ"}, + {"Ⴍ", "ⴍ"}, + {"Ⴎ", "ⴎ"}, + {"Ⴏ", "ⴏ"}, + {"Ⴐ", "ⴐ"}, + {"Ⴑ", "ⴑ"}, + {"Ⴒ", "ⴒ"}, + {"Ⴓ", "ⴓ"}, + {"Ⴔ", "ⴔ"}, + {"Ⴕ", "ⴕ"}, + {"Ⴖ", "ⴖ"}, + {"Ⴗ", "ⴗ"}, + {"Ⴘ", "ⴘ"}, + {"Ⴙ", "ⴙ"}, + {"Ⴚ", "ⴚ"}, + {"Ⴛ", "ⴛ"}, + {"Ⴜ", "ⴜ"}, + {"Ⴝ", "ⴝ"}, + {"Ⴞ", "ⴞ"}, + {"Ⴟ", "ⴟ"}, + {"Ⴠ", "ⴠ"}, + {"Ⴡ", "ⴡ"}, + {"Ⴢ", "ⴢ"}, + {"Ⴣ", "ⴣ"}, + {"Ⴤ", "ⴤ"}, + {"Ⴥ", "ⴥ"}, + {"Ⴧ", "ⴧ"}, + {"Ⴭ", "ⴭ"}, + {"Ꭰ", "ꭰ"}, + {"Ꭱ", "ꭱ"}, + {"Ꭲ", "ꭲ"}, + {"Ꭳ", "ꭳ"}, + {"Ꭴ", "ꭴ"}, + {"Ꭵ", "ꭵ"}, + {"Ꭶ", "ꭶ"}, + {"Ꭷ", "ꭷ"}, + {"Ꭸ", "ꭸ"}, + {"Ꭹ", "ꭹ"}, + {"Ꭺ", "ꭺ"}, + {"Ꭻ", "ꭻ"}, + {"Ꭼ", "ꭼ"}, + {"Ꭽ", "ꭽ"}, + {"Ꭾ", "ꭾ"}, + {"Ꭿ", "ꭿ"}, + {"Ꮀ", "ꮀ"}, + {"Ꮁ", "ꮁ"}, + {"Ꮂ", "ꮂ"}, + {"Ꮃ", "ꮃ"}, + {"Ꮄ", "ꮄ"}, + {"Ꮅ", "ꮅ"}, + {"Ꮆ", "ꮆ"}, + {"Ꮇ", "ꮇ"}, + {"Ꮈ", "ꮈ"}, + {"Ꮉ", "ꮉ"}, + {"Ꮊ", "ꮊ"}, + {"Ꮋ", "ꮋ"}, + {"Ꮌ", "ꮌ"}, + {"Ꮍ", "ꮍ"}, + {"Ꮎ", "ꮎ"}, + {"Ꮏ", "ꮏ"}, + {"Ꮐ", "ꮐ"}, + {"Ꮑ", "ꮑ"}, + {"Ꮒ", "ꮒ"}, + {"Ꮓ", "ꮓ"}, + {"Ꮔ", "ꮔ"}, + {"Ꮕ", "ꮕ"}, + {"Ꮖ", "ꮖ"}, + {"Ꮗ", "ꮗ"}, + {"Ꮘ", "ꮘ"}, + {"Ꮙ", "ꮙ"}, + {"Ꮚ", "ꮚ"}, + {"Ꮛ", "ꮛ"}, + {"Ꮜ", "ꮜ"}, + {"Ꮝ", "ꮝ"}, + {"Ꮞ", "ꮞ"}, + {"Ꮟ", "ꮟ"}, + {"Ꮠ", "ꮠ"}, + {"Ꮡ", "ꮡ"}, + {"Ꮢ", "ꮢ"}, + {"Ꮣ", "ꮣ"}, + {"Ꮤ", "ꮤ"}, + {"Ꮥ", "ꮥ"}, + {"Ꮦ", "ꮦ"}, + {"Ꮧ", "ꮧ"}, + {"Ꮨ", "ꮨ"}, + {"Ꮩ", "ꮩ"}, + {"Ꮪ", "ꮪ"}, + {"Ꮫ", "ꮫ"}, + {"Ꮬ", "ꮬ"}, + {"Ꮭ", "ꮭ"}, + {"Ꮮ", "ꮮ"}, + {"Ꮯ", "ꮯ"}, + {"Ꮰ", "ꮰ"}, + {"Ꮱ", "ꮱ"}, + {"Ꮲ", "ꮲ"}, + {"Ꮳ", "ꮳ"}, + {"Ꮴ", "ꮴ"}, + {"Ꮵ", "ꮵ"}, + {"Ꮶ", "ꮶ"}, + {"Ꮷ", "ꮷ"}, + {"Ꮸ", "ꮸ"}, + {"Ꮹ", "ꮹ"}, + {"Ꮺ", "ꮺ"}, + {"Ꮻ", "ꮻ"}, + {"Ꮼ", "ꮼ"}, + {"Ꮽ", "ꮽ"}, + {"Ꮾ", "ꮾ"}, + {"Ꮿ", "ꮿ"}, + {"Ᏸ", "ᏸ"}, + {"Ᏹ", "ᏹ"}, + {"Ᏺ", "ᏺ"}, + {"Ᏻ", "ᏻ"}, + {"Ᏼ", "ᏼ"}, + {"Ᏽ", "ᏽ"}, + {"Ა", "ა"}, + {"Ბ", "ბ"}, + {"Გ", "გ"}, + {"Დ", "დ"}, + {"Ე", "ე"}, + {"Ვ", "ვ"}, + {"Ზ", "ზ"}, + {"Თ", "თ"}, + {"Ი", "ი"}, + {"Კ", "კ"}, + {"Ლ", "ლ"}, + {"Მ", "მ"}, + {"Ნ", "ნ"}, + {"Ო", "ო"}, + {"Პ", "პ"}, + {"Ჟ", "ჟ"}, + {"Რ", "რ"}, + {"Ს", "ს"}, + {"Ტ", "ტ"}, + {"Უ", "უ"}, + {"Ფ", "ფ"}, + {"Ქ", "ქ"}, + {"Ღ", "ღ"}, + {"Ყ", "ყ"}, + {"Შ", "შ"}, + {"Ჩ", "ჩ"}, + {"Ც", "ც"}, + {"Ძ", "ძ"}, + {"Წ", "წ"}, + {"Ჭ", "ჭ"}, + {"Ხ", "ხ"}, + {"Ჯ", "ჯ"}, + {"Ჰ", "ჰ"}, + {"Ჱ", "ჱ"}, + {"Ჲ", "ჲ"}, + {"Ჳ", "ჳ"}, + {"Ჴ", "ჴ"}, + {"Ჵ", "ჵ"}, + {"Ჶ", "ჶ"}, + {"Ჷ", "ჷ"}, + {"Ჸ", "ჸ"}, + {"Ჹ", "ჹ"}, + {"Ჺ", "ჺ"}, + {"Ჽ", "ჽ"}, + {"Ჾ", "ჾ"}, + {"Ჿ", "ჿ"}, + {"Ḁ", "ḁ"}, + {"Ḃ", "ḃ"}, + {"Ḅ", "ḅ"}, + {"Ḇ", "ḇ"}, + {"Ḉ", "ḉ"}, + {"Ḋ", "ḋ"}, + {"Ḍ", "ḍ"}, + {"Ḏ", "ḏ"}, + {"Ḑ", "ḑ"}, + {"Ḓ", "ḓ"}, + {"Ḕ", "ḕ"}, + {"Ḗ", "ḗ"}, + {"Ḙ", "ḙ"}, + {"Ḛ", "ḛ"}, + {"Ḝ", "ḝ"}, + {"Ḟ", "ḟ"}, + {"Ḡ", "ḡ"}, + {"Ḣ", "ḣ"}, + {"Ḥ", "ḥ"}, + {"Ḧ", "ḧ"}, + {"Ḩ", "ḩ"}, + {"Ḫ", "ḫ"}, + {"Ḭ", "ḭ"}, + {"Ḯ", "ḯ"}, + {"Ḱ", "ḱ"}, + {"Ḳ", "ḳ"}, + {"Ḵ", "ḵ"}, + {"Ḷ", "ḷ"}, + {"Ḹ", "ḹ"}, + {"Ḻ", "ḻ"}, + {"Ḽ", "ḽ"}, + {"Ḿ", "ḿ"}, + {"Ṁ", "ṁ"}, + {"Ṃ", "ṃ"}, + {"Ṅ", "ṅ"}, + {"Ṇ", "ṇ"}, + {"Ṉ", "ṉ"}, + {"Ṋ", "ṋ"}, + {"Ṍ", "ṍ"}, + {"Ṏ", "ṏ"}, + {"Ṑ", "ṑ"}, + {"Ṓ", "ṓ"}, + {"Ṕ", "ṕ"}, + {"Ṗ", "ṗ"}, + {"Ṙ", "ṙ"}, + {"Ṛ", "ṛ"}, + {"Ṝ", "ṝ"}, + {"Ṟ", "ṟ"}, + {"Ṡ", "ṡ"}, + {"Ṣ", "ṣ"}, + {"Ṥ", "ṥ"}, + {"Ṧ", "ṧ"}, + {"Ṩ", "ṩ"}, + {"Ṫ", "ṫ"}, + {"Ṭ", "ṭ"}, + {"Ṯ", "ṯ"}, + {"Ṱ", "ṱ"}, + {"Ṳ", "ṳ"}, + {"Ṵ", "ṵ"}, + {"Ṷ", "ṷ"}, + {"Ṹ", "ṹ"}, + {"Ṻ", "ṻ"}, + {"Ṽ", "ṽ"}, + {"Ṿ", "ṿ"}, + {"Ẁ", "ẁ"}, + {"Ẃ", "ẃ"}, + {"Ẅ", "ẅ"}, + {"Ẇ", "ẇ"}, + {"Ẉ", "ẉ"}, + {"Ẋ", "ẋ"}, + {"Ẍ", "ẍ"}, + {"Ẏ", "ẏ"}, + {"Ẑ", "ẑ"}, + {"Ẓ", "ẓ"}, + {"Ẕ", "ẕ"}, + {"ẞ", "ß"}, + {"Ạ", "ạ"}, + {"Ả", "ả"}, + {"Ấ", "ấ"}, + {"Ầ", "ầ"}, + {"Ẩ", "ẩ"}, + {"Ẫ", "ẫ"}, + {"Ậ", "ậ"}, + {"Ắ", "ắ"}, + {"Ằ", "ằ"}, + {"Ẳ", "ẳ"}, + {"Ẵ", "ẵ"}, + {"Ặ", "ặ"}, + {"Ẹ", "ẹ"}, + {"Ẻ", "ẻ"}, + {"Ẽ", "ẽ"}, + {"Ế", "ế"}, + {"Ề", "ề"}, + {"Ể", "ể"}, + {"Ễ", "ễ"}, + {"Ệ", "ệ"}, + {"Ỉ", "ỉ"}, + {"Ị", "ị"}, + {"Ọ", "ọ"}, + {"Ỏ", "ỏ"}, + {"Ố", "ố"}, + {"Ồ", "ồ"}, + {"Ổ", "ổ"}, + {"Ỗ", "ỗ"}, + {"Ộ", "ộ"}, + {"Ớ", "ớ"}, + {"Ờ", "ờ"}, + {"Ở", "ở"}, + {"Ỡ", "ỡ"}, + {"Ợ", "ợ"}, + {"Ụ", "ụ"}, + {"Ủ", "ủ"}, + {"Ứ", "ứ"}, + {"Ừ", "ừ"}, + {"Ử", "ử"}, + {"Ữ", "ữ"}, + {"Ự", "ự"}, + {"Ỳ", "ỳ"}, + {"Ỵ", "ỵ"}, + {"Ỷ", "ỷ"}, + {"Ỹ", "ỹ"}, + {"Ỻ", "ỻ"}, + {"Ỽ", "ỽ"}, + {"Ỿ", "ỿ"}, + {"Ἀ", "ἀ"}, + {"Ἁ", "ἁ"}, + {"Ἂ", "ἂ"}, + {"Ἃ", "ἃ"}, + {"Ἄ", "ἄ"}, + {"Ἅ", "ἅ"}, + {"Ἆ", "ἆ"}, + {"Ἇ", "ἇ"}, + {"Ἐ", "ἐ"}, + {"Ἑ", "ἑ"}, + {"Ἒ", "ἒ"}, + {"Ἓ", "ἓ"}, + {"Ἔ", "ἔ"}, + {"Ἕ", "ἕ"}, + {"Ἠ", "ἠ"}, + {"Ἡ", "ἡ"}, + {"Ἢ", "ἢ"}, + {"Ἣ", "ἣ"}, + {"Ἤ", "ἤ"}, + {"Ἥ", "ἥ"}, + {"Ἦ", "ἦ"}, + {"Ἧ", "ἧ"}, + {"Ἰ", "ἰ"}, + {"Ἱ", "ἱ"}, + {"Ἲ", "ἲ"}, + {"Ἳ", "ἳ"}, + {"Ἴ", "ἴ"}, + {"Ἵ", "ἵ"}, + {"Ἶ", "ἶ"}, + {"Ἷ", "ἷ"}, + {"Ὀ", "ὀ"}, + {"Ὁ", "ὁ"}, + {"Ὂ", "ὂ"}, + {"Ὃ", "ὃ"}, + {"Ὄ", "ὄ"}, + {"Ὅ", "ὅ"}, + {"Ὑ", "ὑ"}, + {"Ὓ", "ὓ"}, + {"Ὕ", "ὕ"}, + {"Ὗ", "ὗ"}, + {"Ὠ", "ὠ"}, + {"Ὡ", "ὡ"}, + {"Ὢ", "ὢ"}, + {"Ὣ", "ὣ"}, + {"Ὤ", "ὤ"}, + {"Ὥ", "ὥ"}, + {"Ὦ", "ὦ"}, + {"Ὧ", "ὧ"}, + {"Ᾰ", "ᾰ"}, + {"Ᾱ", "ᾱ"}, + {"Ὰ", "ὰ"}, + {"Ά", "ά"}, + {"Ὲ", "ὲ"}, + {"Έ", "έ"}, + {"Ὴ", "ὴ"}, + {"Ή", "ή"}, + {"Ῐ", "ῐ"}, + {"Ῑ", "ῑ"}, + {"Ὶ", "ὶ"}, + {"Ί", "ί"}, + {"Ῠ", "ῠ"}, + {"Ῡ", "ῡ"}, + {"Ὺ", "ὺ"}, + {"Ύ", "ύ"}, + {"Ῥ", "ῥ"}, + {"Ὸ", "ὸ"}, + {"Ό", "ό"}, + {"Ὼ", "ὼ"}, + {"Ώ", "ώ"}, + {"ℂ", "ℂ"}, + {"ℇ", "ℇ"}, + {"ℋ", "ℋ"}, + {"ℌ", "ℌ"}, + {"ℍ", "ℍ"}, + {"ℐ", "ℐ"}, + {"ℑ", "ℑ"}, + {"ℒ", "ℒ"}, + {"ℕ", "ℕ"}, + {"ℙ", "ℙ"}, + {"ℚ", "ℚ"}, + {"ℛ", "ℛ"}, + {"ℜ", "ℜ"}, + {"ℝ", "ℝ"}, + {"ℤ", "ℤ"}, + {"Ω", "ω"}, + {"ℨ", "ℨ"}, + {"K", "k"}, + {"Å", "å"}, + {"ℬ", "ℬ"}, + {"ℭ", "ℭ"}, + {"ℰ", "ℰ"}, + {"ℱ", "ℱ"}, + {"Ⅎ", "ⅎ"}, + {"ℳ", "ℳ"}, + {"ℾ", "ℾ"}, + {"ℿ", "ℿ"}, + {"ⅅ", "ⅅ"}, + {"Ↄ", "ↄ"}, + {"Ⰰ", "ⰰ"}, + {"Ⰱ", "ⰱ"}, + {"Ⰲ", "ⰲ"}, + {"Ⰳ", "ⰳ"}, + {"Ⰴ", "ⰴ"}, + {"Ⰵ", "ⰵ"}, + {"Ⰶ", "ⰶ"}, + {"Ⰷ", "ⰷ"}, + {"Ⰸ", "ⰸ"}, + {"Ⰹ", "ⰹ"}, + {"Ⰺ", "ⰺ"}, + {"Ⰻ", "ⰻ"}, + {"Ⰼ", "ⰼ"}, + {"Ⰽ", "ⰽ"}, + {"Ⰾ", "ⰾ"}, + {"Ⰿ", "ⰿ"}, + {"Ⱀ", "ⱀ"}, + {"Ⱁ", "ⱁ"}, + {"Ⱂ", "ⱂ"}, + {"Ⱃ", "ⱃ"}, + {"Ⱄ", "ⱄ"}, + {"Ⱅ", "ⱅ"}, + {"Ⱆ", "ⱆ"}, + {"Ⱇ", "ⱇ"}, + {"Ⱈ", "ⱈ"}, + {"Ⱉ", "ⱉ"}, + {"Ⱊ", "ⱊ"}, + {"Ⱋ", "ⱋ"}, + {"Ⱌ", "ⱌ"}, + {"Ⱍ", "ⱍ"}, + {"Ⱎ", "ⱎ"}, + {"Ⱏ", "ⱏ"}, + {"Ⱐ", "ⱐ"}, + {"Ⱑ", "ⱑ"}, + {"Ⱒ", "ⱒ"}, + {"Ⱓ", "ⱓ"}, + {"Ⱔ", "ⱔ"}, + {"Ⱕ", "ⱕ"}, + {"Ⱖ", "ⱖ"}, + {"Ⱗ", "ⱗ"}, + {"Ⱘ", "ⱘ"}, + {"Ⱙ", "ⱙ"}, + {"Ⱚ", "ⱚ"}, + {"Ⱛ", "ⱛ"}, + {"Ⱜ", "ⱜ"}, + {"Ⱝ", "ⱝ"}, + {"Ⱞ", "ⱞ"}, + {"Ⱡ", "ⱡ"}, + {"Ɫ", "ɫ"}, + {"Ᵽ", "ᵽ"}, + {"Ɽ", "ɽ"}, + {"Ⱨ", "ⱨ"}, + {"Ⱪ", "ⱪ"}, + {"Ⱬ", "ⱬ"}, + {"Ɑ", "ɑ"}, + {"Ɱ", "ɱ"}, + {"Ɐ", "ɐ"}, + {"Ɒ", "ɒ"}, + {"Ⱳ", "ⱳ"}, + {"Ⱶ", "ⱶ"}, + {"Ȿ", "ȿ"}, + {"Ɀ", "ɀ"}, + {"Ⲁ", "ⲁ"}, + {"Ⲃ", "ⲃ"}, + {"Ⲅ", "ⲅ"}, + {"Ⲇ", "ⲇ"}, + {"Ⲉ", "ⲉ"}, + {"Ⲋ", "ⲋ"}, + {"Ⲍ", "ⲍ"}, + {"Ⲏ", "ⲏ"}, + {"Ⲑ", "ⲑ"}, + {"Ⲓ", "ⲓ"}, + {"Ⲕ", "ⲕ"}, + {"Ⲗ", "ⲗ"}, + {"Ⲙ", "ⲙ"}, + {"Ⲛ", "ⲛ"}, + {"Ⲝ", "ⲝ"}, + {"Ⲟ", "ⲟ"}, + {"Ⲡ", "ⲡ"}, + {"Ⲣ", "ⲣ"}, + {"Ⲥ", "ⲥ"}, + {"Ⲧ", "ⲧ"}, + {"Ⲩ", "ⲩ"}, + {"Ⲫ", "ⲫ"}, + {"Ⲭ", "ⲭ"}, + {"Ⲯ", "ⲯ"}, + {"Ⲱ", "ⲱ"}, + {"Ⲳ", "ⲳ"}, + {"Ⲵ", "ⲵ"}, + {"Ⲷ", "ⲷ"}, + {"Ⲹ", "ⲹ"}, + {"Ⲻ", "ⲻ"}, + {"Ⲽ", "ⲽ"}, + {"Ⲿ", "ⲿ"}, + {"Ⳁ", "ⳁ"}, + {"Ⳃ", "ⳃ"}, + {"Ⳅ", "ⳅ"}, + {"Ⳇ", "ⳇ"}, + {"Ⳉ", "ⳉ"}, + {"Ⳋ", "ⳋ"}, + {"Ⳍ", "ⳍ"}, + {"Ⳏ", "ⳏ"}, + {"Ⳑ", "ⳑ"}, + {"Ⳓ", "ⳓ"}, + {"Ⳕ", "ⳕ"}, + {"Ⳗ", "ⳗ"}, + {"Ⳙ", "ⳙ"}, + {"Ⳛ", "ⳛ"}, + {"Ⳝ", "ⳝ"}, + {"Ⳟ", "ⳟ"}, + {"Ⳡ", "ⳡ"}, + {"Ⳣ", "ⳣ"}, + {"Ⳬ", "ⳬ"}, + {"Ⳮ", "ⳮ"}, + {"Ⳳ", "ⳳ"}, + {"Ꙁ", "ꙁ"}, + {"Ꙃ", "ꙃ"}, + {"Ꙅ", "ꙅ"}, + {"Ꙇ", "ꙇ"}, + {"Ꙉ", "ꙉ"}, + {"Ꙋ", "ꙋ"}, + {"Ꙍ", "ꙍ"}, + {"Ꙏ", "ꙏ"}, + {"Ꙑ", "ꙑ"}, + {"Ꙓ", "ꙓ"}, + {"Ꙕ", "ꙕ"}, + {"Ꙗ", "ꙗ"}, + {"Ꙙ", "ꙙ"}, + {"Ꙛ", "ꙛ"}, + {"Ꙝ", "ꙝ"}, + {"Ꙟ", "ꙟ"}, + {"Ꙡ", "ꙡ"}, + {"Ꙣ", "ꙣ"}, + {"Ꙥ", "ꙥ"}, + {"Ꙧ", "ꙧ"}, + {"Ꙩ", "ꙩ"}, + {"Ꙫ", "ꙫ"}, + {"Ꙭ", "ꙭ"}, + {"Ꚁ", "ꚁ"}, + {"Ꚃ", "ꚃ"}, + {"Ꚅ", "ꚅ"}, + {"Ꚇ", "ꚇ"}, + {"Ꚉ", "ꚉ"}, + {"Ꚋ", "ꚋ"}, + {"Ꚍ", "ꚍ"}, + {"Ꚏ", "ꚏ"}, + {"Ꚑ", "ꚑ"}, + {"Ꚓ", "ꚓ"}, + {"Ꚕ", "ꚕ"}, + {"Ꚗ", "ꚗ"}, + {"Ꚙ", "ꚙ"}, + {"Ꚛ", "ꚛ"}, + {"Ꜣ", "ꜣ"}, + {"Ꜥ", "ꜥ"}, + {"Ꜧ", "ꜧ"}, + {"Ꜩ", "ꜩ"}, + {"Ꜫ", "ꜫ"}, + {"Ꜭ", "ꜭ"}, + {"Ꜯ", "ꜯ"}, + {"Ꜳ", "ꜳ"}, + {"Ꜵ", "ꜵ"}, + {"Ꜷ", "ꜷ"}, + {"Ꜹ", "ꜹ"}, + {"Ꜻ", "ꜻ"}, + {"Ꜽ", "ꜽ"}, + {"Ꜿ", "ꜿ"}, + {"Ꝁ", "ꝁ"}, + {"Ꝃ", "ꝃ"}, + {"Ꝅ", "ꝅ"}, + {"Ꝇ", "ꝇ"}, + {"Ꝉ", "ꝉ"}, + {"Ꝋ", "ꝋ"}, + {"Ꝍ", "ꝍ"}, + {"Ꝏ", "ꝏ"}, + {"Ꝑ", "ꝑ"}, + {"Ꝓ", "ꝓ"}, + {"Ꝕ", "ꝕ"}, + {"Ꝗ", "ꝗ"}, + {"Ꝙ", "ꝙ"}, + {"Ꝛ", "ꝛ"}, + {"Ꝝ", "ꝝ"}, + {"Ꝟ", "ꝟ"}, + {"Ꝡ", "ꝡ"}, + {"Ꝣ", "ꝣ"}, + {"Ꝥ", "ꝥ"}, + {"Ꝧ", "ꝧ"}, + {"Ꝩ", "ꝩ"}, + {"Ꝫ", "ꝫ"}, + {"Ꝭ", "ꝭ"}, + {"Ꝯ", "ꝯ"}, + {"Ꝺ", "ꝺ"}, + {"Ꝼ", "ꝼ"}, + {"Ᵹ", "ᵹ"}, + {"Ꝿ", "ꝿ"}, + {"Ꞁ", "ꞁ"}, + {"Ꞃ", "ꞃ"}, + {"Ꞅ", "ꞅ"}, + {"Ꞇ", "ꞇ"}, + {"Ꞌ", "ꞌ"}, + {"Ɥ", "ɥ"}, + {"Ꞑ", "ꞑ"}, + {"Ꞓ", "ꞓ"}, + {"Ꞗ", "ꞗ"}, + {"Ꞙ", "ꞙ"}, + {"Ꞛ", "ꞛ"}, + {"Ꞝ", "ꞝ"}, + {"Ꞟ", "ꞟ"}, + {"Ꞡ", "ꞡ"}, + {"Ꞣ", "ꞣ"}, + {"Ꞥ", "ꞥ"}, + {"Ꞧ", "ꞧ"}, + {"Ꞩ", "ꞩ"}, + {"Ɦ", "ɦ"}, + {"Ɜ", "ɜ"}, + {"Ɡ", "ɡ"}, + {"Ɬ", "ɬ"}, + {"Ɪ", "ɪ"}, + {"Ʞ", "ʞ"}, + {"Ʇ", "ʇ"}, + {"Ʝ", "ʝ"}, + {"Ꭓ", "ꭓ"}, + {"Ꞵ", "ꞵ"}, + {"Ꞷ", "ꞷ"}, + {"Ꞹ", "ꞹ"}, + {"Ꞻ", "ꞻ"}, + {"Ꞽ", "ꞽ"}, + {"Ꞿ", "ꞿ"}, + {"Ꟃ", "ꟃ"}, + {"Ꞔ", "ꞔ"}, + {"Ʂ", "ʂ"}, + {"Ᶎ", "ᶎ"}, + {"A", "a"}, + {"B", "b"}, + {"C", "c"}, + {"D", "d"}, + {"E", "e"}, + {"F", "f"}, + {"G", "g"}, + {"H", "h"}, + {"I", "i"}, + {"J", "j"}, + {"K", "k"}, + {"L", "l"}, + {"M", "m"}, + {"N", "n"}, + {"O", "o"}, + {"P", "p"}, + {"Q", "q"}, + {"R", "r"}, + {"S", "s"}, + {"T", "t"}, + {"U", "u"}, + {"V", "v"}, + {"W", "w"}, + {"X", "x"}, + {"Y", "y"}, + {"Z", "z"}, + {"𐐀", "𐐨"}, + {"𐐁", "𐐩"}, + {"𐐂", "𐐪"}, + {"𐐃", "𐐫"}, + {"𐐄", "𐐬"}, + {"𐐅", "𐐭"}, + {"𐐆", "𐐮"}, + {"𐐇", "𐐯"}, + {"𐐈", "𐐰"}, + {"𐐉", "𐐱"}, + {"𐐊", "𐐲"}, + {"𐐋", "𐐳"}, + {"𐐌", "𐐴"}, + {"𐐍", "𐐵"}, + {"𐐎", "𐐶"}, + {"𐐏", "𐐷"}, + {"𐐐", "𐐸"}, + {"𐐑", "𐐹"}, + {"𐐒", "𐐺"}, + {"𐐓", "𐐻"}, + {"𐐔", "𐐼"}, + {"𐐕", "𐐽"}, + {"𐐖", "𐐾"}, + {"𐐗", "𐐿"}, + {"𐐘", "𐑀"}, + {"𐐙", "𐑁"}, + {"𐐚", "𐑂"}, + {"𐐛", "𐑃"}, + {"𐐜", "𐑄"}, + {"𐐝", "𐑅"}, + {"𐐞", "𐑆"}, + {"𐐟", "𐑇"}, + {"𐐠", "𐑈"}, + {"𐐡", "𐑉"}, + {"𐐢", "𐑊"}, + {"𐐣", "𐑋"}, + {"𐐤", "𐑌"}, + {"𐐥", "𐑍"}, + {"𐐦", "𐑎"}, + {"𐐧", "𐑏"}, + {"𐒰", "𐓘"}, + {"𐒱", "𐓙"}, + {"𐒲", "𐓚"}, + {"𐒳", "𐓛"}, + {"𐒴", "𐓜"}, + {"𐒵", "𐓝"}, + {"𐒶", "𐓞"}, + {"𐒷", "𐓟"}, + {"𐒸", "𐓠"}, + {"𐒹", "𐓡"}, + {"𐒺", "𐓢"}, + {"𐒻", "𐓣"}, + {"𐒼", "𐓤"}, + {"𐒽", "𐓥"}, + {"𐒾", "𐓦"}, + {"𐒿", "𐓧"}, + {"𐓀", "𐓨"}, + {"𐓁", "𐓩"}, + {"𐓂", "𐓪"}, + {"𐓃", "𐓫"}, + {"𐓄", "𐓬"}, + {"𐓅", "𐓭"}, + {"𐓆", "𐓮"}, + {"𐓇", "𐓯"}, + {"𐓈", "𐓰"}, + {"𐓉", "𐓱"}, + {"𐓊", "𐓲"}, + {"𐓋", "𐓳"}, + {"𐓌", "𐓴"}, + {"𐓍", "𐓵"}, + {"𐓎", "𐓶"}, + {"𐓏", "𐓷"}, + {"𐓐", "𐓸"}, + {"𐓑", "𐓹"}, + {"𐓒", "𐓺"}, + {"𐓓", "𐓻"}, + {"𐲀", "𐳀"}, + {"𐲁", "𐳁"}, + {"𐲂", "𐳂"}, + {"𐲃", "𐳃"}, + {"𐲄", "𐳄"}, + {"𐲅", "𐳅"}, + {"𐲆", "𐳆"}, + {"𐲇", "𐳇"}, + {"𐲈", "𐳈"}, + {"𐲉", "𐳉"}, + {"𐲊", "𐳊"}, + {"𐲋", "𐳋"}, + {"𐲌", "𐳌"}, + {"𐲍", "𐳍"}, + {"𐲎", "𐳎"}, + {"𐲏", "𐳏"}, + {"𐲐", "𐳐"}, + {"𐲑", "𐳑"}, + {"𐲒", "𐳒"}, + {"𐲓", "𐳓"}, + {"𐲔", "𐳔"}, + {"𐲕", "𐳕"}, + {"𐲖", "𐳖"}, + {"𐲗", "𐳗"}, + {"𐲘", "𐳘"}, + {"𐲙", "𐳙"}, + {"𐲚", "𐳚"}, + {"𐲛", "𐳛"}, + {"𐲜", "𐳜"}, + {"𐲝", "𐳝"}, + {"𐲞", "𐳞"}, + {"𐲟", "𐳟"}, + {"𐲠", "𐳠"}, + {"𐲡", "𐳡"}, + {"𐲢", "𐳢"}, + {"𐲣", "𐳣"}, + {"𐲤", "𐳤"}, + {"𐲥", "𐳥"}, + {"𐲦", "𐳦"}, + {"𐲧", "𐳧"}, + {"𐲨", "𐳨"}, + {"𐲩", "𐳩"}, + {"𐲪", "𐳪"}, + {"𐲫", "𐳫"}, + {"𐲬", "𐳬"}, + {"𐲭", "𐳭"}, + {"𐲮", "𐳮"}, + {"𐲯", "𐳯"}, + {"𐲰", "𐳰"}, + {"𐲱", "𐳱"}, + {"𐲲", "𐳲"}, + {"𑢠", "𑣀"}, + {"𑢡", "𑣁"}, + {"𑢢", "𑣂"}, + {"𑢣", "𑣃"}, + {"𑢤", "𑣄"}, + {"𑢥", "𑣅"}, + {"𑢦", "𑣆"}, + {"𑢧", "𑣇"}, + {"𑢨", "𑣈"}, + {"𑢩", "𑣉"}, + {"𑢪", "𑣊"}, + {"𑢫", "𑣋"}, + {"𑢬", "𑣌"}, + {"𑢭", "𑣍"}, + {"𑢮", "𑣎"}, + {"𑢯", "𑣏"}, + {"𑢰", "𑣐"}, + {"𑢱", "𑣑"}, + {"𑢲", "𑣒"}, + {"𑢳", "𑣓"}, + {"𑢴", "𑣔"}, + {"𑢵", "𑣕"}, + {"𑢶", "𑣖"}, + {"𑢷", "𑣗"}, + {"𑢸", "𑣘"}, + {"𑢹", "𑣙"}, + {"𑢺", "𑣚"}, + {"𑢻", "𑣛"}, + {"𑢼", "𑣜"}, + {"𑢽", "𑣝"}, + {"𑢾", "𑣞"}, + {"𑢿", "𑣟"}, + {"𖹀", "𖹠"}, + {"𖹁", "𖹡"}, + {"𖹂", "𖹢"}, + {"𖹃", "𖹣"}, + {"𖹄", "𖹤"}, + {"𖹅", "𖹥"}, + {"𖹆", "𖹦"}, + {"𖹇", "𖹧"}, + {"𖹈", "𖹨"}, + {"𖹉", "𖹩"}, + {"𖹊", "𖹪"}, + {"𖹋", "𖹫"}, + {"𖹌", "𖹬"}, + {"𖹍", "𖹭"}, + {"𖹎", "𖹮"}, + {"𖹏", "𖹯"}, + {"𖹐", "𖹰"}, + {"𖹑", "𖹱"}, + {"𖹒", "𖹲"}, + {"𖹓", "𖹳"}, + {"𖹔", "𖹴"}, + {"𖹕", "𖹵"}, + {"𖹖", "𖹶"}, + {"𖹗", "𖹷"}, + {"𖹘", "𖹸"}, + {"𖹙", "𖹹"}, + {"𖹚", "𖹺"}, + {"𖹛", "𖹻"}, + {"𖹜", "𖹼"}, + {"𖹝", "𖹽"}, + {"𖹞", "𖹾"}, + {"𖹟", "𖹿"}, + {"𝐀", "𝐀"}, + {"𝐁", "𝐁"}, + {"𝐂", "𝐂"}, + {"𝐃", "𝐃"}, + {"𝐄", "𝐄"}, + {"𝐅", "𝐅"}, + {"𝐆", "𝐆"}, + {"𝐇", "𝐇"}, + {"𝐈", "𝐈"}, + {"𝐉", "𝐉"}, + {"𝐊", "𝐊"}, + {"𝐋", "𝐋"}, + {"𝐌", "𝐌"}, + {"𝐍", "𝐍"}, + {"𝐎", "𝐎"}, + {"𝐏", "𝐏"}, + {"𝐐", "𝐐"}, + {"𝐑", "𝐑"}, + {"𝐒", "𝐒"}, + {"𝐓", "𝐓"}, + {"𝐔", "𝐔"}, + {"𝐕", "𝐕"}, + {"𝐖", "𝐖"}, + {"𝐗", "𝐗"}, + {"𝐘", "𝐘"}, + {"𝐙", "𝐙"}, + {"𝐴", "𝐴"}, + {"𝐵", "𝐵"}, + {"𝐶", "𝐶"}, + {"𝐷", "𝐷"}, + {"𝐸", "𝐸"}, + {"𝐹", "𝐹"}, + {"𝐺", "𝐺"}, + {"𝐻", "𝐻"}, + {"𝐼", "𝐼"}, + {"𝐽", "𝐽"}, + {"𝐾", "𝐾"}, + {"𝐿", "𝐿"}, + {"𝑀", "𝑀"}, + {"𝑁", "𝑁"}, + {"𝑂", "𝑂"}, + {"𝑃", "𝑃"}, + {"𝑄", "𝑄"}, + {"𝑅", "𝑅"}, + {"𝑆", "𝑆"}, + {"𝑇", "𝑇"}, + {"𝑈", "𝑈"}, + {"𝑉", "𝑉"}, + {"𝑊", "𝑊"}, + {"𝑋", "𝑋"}, + {"𝑌", "𝑌"}, + {"𝑍", "𝑍"}, + {"𝑨", "𝑨"}, + {"𝑩", "𝑩"}, + {"𝑪", "𝑪"}, + {"𝑫", "𝑫"}, + {"𝑬", "𝑬"}, + {"𝑭", "𝑭"}, + {"𝑮", "𝑮"}, + {"𝑯", "𝑯"}, + {"𝑰", "𝑰"}, + {"𝑱", "𝑱"}, + {"𝑲", "𝑲"}, + {"𝑳", "𝑳"}, + {"𝑴", "𝑴"}, + {"𝑵", "𝑵"}, + {"𝑶", "𝑶"}, + {"𝑷", "𝑷"}, + {"𝑸", "𝑸"}, + {"𝑹", "𝑹"}, + {"𝑺", "𝑺"}, + {"𝑻", "𝑻"}, + {"𝑼", "𝑼"}, + {"𝑽", "𝑽"}, + {"𝑾", "𝑾"}, + {"𝑿", "𝑿"}, + {"𝒀", "𝒀"}, + {"𝒁", "𝒁"}, + {"𝒜", "𝒜"}, + {"𝒞", "𝒞"}, + {"𝒟", "𝒟"}, + {"𝒢", "𝒢"}, + {"𝒥", "𝒥"}, + {"𝒦", "𝒦"}, + {"𝒩", "𝒩"}, + {"𝒪", "𝒪"}, + {"𝒫", "𝒫"}, + {"𝒬", "𝒬"}, + {"𝒮", "𝒮"}, + {"𝒯", "𝒯"}, + {"𝒰", "𝒰"}, + {"𝒱", "𝒱"}, + {"𝒲", "𝒲"}, + {"𝒳", "𝒳"}, + {"𝒴", "𝒴"}, + {"𝒵", "𝒵"}, + {"𝓐", "𝓐"}, + {"𝓑", "𝓑"}, + {"𝓒", "𝓒"}, + {"𝓓", "𝓓"}, + {"𝓔", "𝓔"}, + {"𝓕", "𝓕"}, + {"𝓖", "𝓖"}, + {"𝓗", "𝓗"}, + {"𝓘", "𝓘"}, + {"𝓙", "𝓙"}, + {"𝓚", "𝓚"}, + {"𝓛", "𝓛"}, + {"𝓜", "𝓜"}, + {"𝓝", "𝓝"}, + {"𝓞", "𝓞"}, + {"𝓟", "𝓟"}, + {"𝓠", "𝓠"}, + {"𝓡", "𝓡"}, + {"𝓢", "𝓢"}, + {"𝓣", "𝓣"}, + {"𝓤", "𝓤"}, + {"𝓥", "𝓥"}, + {"𝓦", "𝓦"}, + {"𝓧", "𝓧"}, + {"𝓨", "𝓨"}, + {"𝓩", "𝓩"}, + {"𝔄", "𝔄"}, + {"𝔅", "𝔅"}, + {"𝔇", "𝔇"}, + {"𝔈", "𝔈"}, + {"𝔉", "𝔉"}, + {"𝔊", "𝔊"}, + {"𝔍", "𝔍"}, + {"𝔎", "𝔎"}, + {"𝔏", "𝔏"}, + {"𝔐", "𝔐"}, + {"𝔑", "𝔑"}, + {"𝔒", "𝔒"}, + {"𝔓", "𝔓"}, + {"𝔔", "𝔔"}, + {"𝔖", "𝔖"}, + {"𝔗", "𝔗"}, + {"𝔘", "𝔘"}, + {"𝔙", "𝔙"}, + {"𝔚", "𝔚"}, + {"𝔛", "𝔛"}, + {"𝔜", "𝔜"}, + {"𝔸", "𝔸"}, + {"𝔹", "𝔹"}, + {"𝔻", "𝔻"}, + {"𝔼", "𝔼"}, + {"𝔽", "𝔽"}, + {"𝔾", "𝔾"}, + {"𝕀", "𝕀"}, + {"𝕁", "𝕁"}, + {"𝕂", "𝕂"}, + {"𝕃", "𝕃"}, + {"𝕄", "𝕄"}, + {"𝕆", "𝕆"}, + {"𝕊", "𝕊"}, + {"𝕋", "𝕋"}, + {"𝕌", "𝕌"}, + {"𝕍", "𝕍"}, + {"𝕎", "𝕎"}, + {"𝕏", "𝕏"}, + {"𝕐", "𝕐"}, + {"𝕬", "𝕬"}, + {"𝕭", "𝕭"}, + {"𝕮", "𝕮"}, + {"𝕯", "𝕯"}, + {"𝕰", "𝕰"}, + {"𝕱", "𝕱"}, + {"𝕲", "𝕲"}, + {"𝕳", "𝕳"}, + {"𝕴", "𝕴"}, + {"𝕵", "𝕵"}, + {"𝕶", "𝕶"}, + {"𝕷", "𝕷"}, + {"𝕸", "𝕸"}, + {"𝕹", "𝕹"}, + {"𝕺", "𝕺"}, + {"𝕻", "𝕻"}, + {"𝕼", "𝕼"}, + {"𝕽", "𝕽"}, + {"𝕾", "𝕾"}, + {"𝕿", "𝕿"}, + {"𝖀", "𝖀"}, + {"𝖁", "𝖁"}, + {"𝖂", "𝖂"}, + {"𝖃", "𝖃"}, + {"𝖄", "𝖄"}, + {"𝖅", "𝖅"}, + {"𝖠", "𝖠"}, + {"𝖡", "𝖡"}, + {"𝖢", "𝖢"}, + {"𝖣", "𝖣"}, + {"𝖤", "𝖤"}, + {"𝖥", "𝖥"}, + {"𝖦", "𝖦"}, + {"𝖧", "𝖧"}, + {"𝖨", "𝖨"}, + {"𝖩", "𝖩"}, + {"𝖪", "𝖪"}, + {"𝖫", "𝖫"}, + {"𝖬", "𝖬"}, + {"𝖭", "𝖭"}, + {"𝖮", "𝖮"}, + {"𝖯", "𝖯"}, + {"𝖰", "𝖰"}, + {"𝖱", "𝖱"}, + {"𝖲", "𝖲"}, + {"𝖳", "𝖳"}, + {"𝖴", "𝖴"}, + {"𝖵", "𝖵"}, + {"𝖶", "𝖶"}, + {"𝖷", "𝖷"}, + {"𝖸", "𝖸"}, + {"𝖹", "𝖹"}, + {"𝗔", "𝗔"}, + {"𝗕", "𝗕"}, + {"𝗖", "𝗖"}, + {"𝗗", "𝗗"}, + {"𝗘", "𝗘"}, + {"𝗙", "𝗙"}, + {"𝗚", "𝗚"}, + {"𝗛", "𝗛"}, + {"𝗜", "𝗜"}, + {"𝗝", "𝗝"}, + {"𝗞", "𝗞"}, + {"𝗟", "𝗟"}, + {"𝗠", "𝗠"}, + {"𝗡", "𝗡"}, + {"𝗢", "𝗢"}, + {"𝗣", "𝗣"}, + {"𝗤", "𝗤"}, + {"𝗥", "𝗥"}, + {"𝗦", "𝗦"}, + {"𝗧", "𝗧"}, + {"𝗨", "𝗨"}, + {"𝗩", "𝗩"}, + {"𝗪", "𝗪"}, + {"𝗫", "𝗫"}, + {"𝗬", "𝗬"}, + {"𝗭", "𝗭"}, + {"𝘈", "𝘈"}, + {"𝘉", "𝘉"}, + {"𝘊", "𝘊"}, + {"𝘋", "𝘋"}, + {"𝘌", "𝘌"}, + {"𝘍", "𝘍"}, + {"𝘎", "𝘎"}, + {"𝘏", "𝘏"}, + {"𝘐", "𝘐"}, + {"𝘑", "𝘑"}, + {"𝘒", "𝘒"}, + {"𝘓", "𝘓"}, + {"𝘔", "𝘔"}, + {"𝘕", "𝘕"}, + {"𝘖", "𝘖"}, + {"𝘗", "𝘗"}, + {"𝘘", "𝘘"}, + {"𝘙", "𝘙"}, + {"𝘚", "𝘚"}, + {"𝘛", "𝘛"}, + {"𝘜", "𝘜"}, + {"𝘝", "𝘝"}, + {"𝘞", "𝘞"}, + {"𝘟", "𝘟"}, + {"𝘠", "𝘠"}, + {"𝘡", "𝘡"}, + {"𝘼", "𝘼"}, + {"𝘽", "𝘽"}, + {"𝘾", "𝘾"}, + {"𝘿", "𝘿"}, + {"𝙀", "𝙀"}, + {"𝙁", "𝙁"}, + {"𝙂", "𝙂"}, + {"𝙃", "𝙃"}, + {"𝙄", "𝙄"}, + {"𝙅", "𝙅"}, + {"𝙆", "𝙆"}, + {"𝙇", "𝙇"}, + {"𝙈", "𝙈"}, + {"𝙉", "𝙉"}, + {"𝙊", "𝙊"}, + {"𝙋", "𝙋"}, + {"𝙌", "𝙌"}, + {"𝙍", "𝙍"}, + {"𝙎", "𝙎"}, + {"𝙏", "𝙏"}, + {"𝙐", "𝙐"}, + {"𝙑", "𝙑"}, + {"𝙒", "𝙒"}, + {"𝙓", "𝙓"}, + {"𝙔", "𝙔"}, + {"𝙕", "𝙕"}, + {"𝙰", "𝙰"}, + {"𝙱", "𝙱"}, + {"𝙲", "𝙲"}, + {"𝙳", "𝙳"}, + {"𝙴", "𝙴"}, + {"𝙵", "𝙵"}, + {"𝙶", "𝙶"}, + {"𝙷", "𝙷"}, + {"𝙸", "𝙸"}, + {"𝙹", "𝙹"}, + {"𝙺", "𝙺"}, + {"𝙻", "𝙻"}, + {"𝙼", "𝙼"}, + {"𝙽", "𝙽"}, + {"𝙾", "𝙾"}, + {"𝙿", "𝙿"}, + {"𝚀", "𝚀"}, + {"𝚁", "𝚁"}, + {"𝚂", "𝚂"}, + {"𝚃", "𝚃"}, + {"𝚄", "𝚄"}, + {"𝚅", "𝚅"}, + {"𝚆", "𝚆"}, + {"𝚇", "𝚇"}, + {"𝚈", "𝚈"}, + {"𝚉", "𝚉"}, + {"𝚨", "𝚨"}, + {"𝚩", "𝚩"}, + {"𝚪", "𝚪"}, + {"𝚫", "𝚫"}, + {"𝚬", "𝚬"}, + {"𝚭", "𝚭"}, + {"𝚮", "𝚮"}, + {"𝚯", "𝚯"}, + {"𝚰", "𝚰"}, + {"𝚱", "𝚱"}, + {"𝚲", "𝚲"}, + {"𝚳", "𝚳"}, + {"𝚴", "𝚴"}, + {"𝚵", "𝚵"}, + {"𝚶", "𝚶"}, + {"𝚷", "𝚷"}, + {"𝚸", "𝚸"}, + {"𝚹", "𝚹"}, + {"𝚺", "𝚺"}, + {"𝚻", "𝚻"}, + {"𝚼", "𝚼"}, + {"𝚽", "𝚽"}, + {"𝚾", "𝚾"}, + {"𝚿", "𝚿"}, + {"𝛀", "𝛀"}, + {"𝛢", "𝛢"}, + {"𝛣", "𝛣"}, + {"𝛤", "𝛤"}, + {"𝛥", "𝛥"}, + {"𝛦", "𝛦"}, + {"𝛧", "𝛧"}, + {"𝛨", "𝛨"}, + {"𝛩", "𝛩"}, + {"𝛪", "𝛪"}, + {"𝛫", "𝛫"}, + {"𝛬", "𝛬"}, + {"𝛭", "𝛭"}, + {"𝛮", "𝛮"}, + {"𝛯", "𝛯"}, + {"𝛰", "𝛰"}, + {"𝛱", "𝛱"}, + {"𝛲", "𝛲"}, + {"𝛳", "𝛳"}, + {"𝛴", "𝛴"}, + {"𝛵", "𝛵"}, + {"𝛶", "𝛶"}, + {"𝛷", "𝛷"}, + {"𝛸", "𝛸"}, + {"𝛹", "𝛹"}, + {"𝛺", "𝛺"}, + {"𝜜", "𝜜"}, + {"𝜝", "𝜝"}, + {"𝜞", "𝜞"}, + {"𝜟", "𝜟"}, + {"𝜠", "𝜠"}, + {"𝜡", "𝜡"}, + {"𝜢", "𝜢"}, + {"𝜣", "𝜣"}, + {"𝜤", "𝜤"}, + {"𝜥", "𝜥"}, + {"𝜦", "𝜦"}, + {"𝜧", "𝜧"}, + {"𝜨", "𝜨"}, + {"𝜩", "𝜩"}, + {"𝜪", "𝜪"}, + {"𝜫", "𝜫"}, + {"𝜬", "𝜬"}, + {"𝜭", "𝜭"}, + {"𝜮", "𝜮"}, + {"𝜯", "𝜯"}, + {"𝜰", "𝜰"}, + {"𝜱", "𝜱"}, + {"𝜲", "𝜲"}, + {"𝜳", "𝜳"}, + {"𝜴", "𝜴"}, + {"𝝖", "𝝖"}, + {"𝝗", "𝝗"}, + {"𝝘", "𝝘"}, + {"𝝙", "𝝙"}, + {"𝝚", "𝝚"}, + {"𝝛", "𝝛"}, + {"𝝜", "𝝜"}, + {"𝝝", "𝝝"}, + {"𝝞", "𝝞"}, + {"𝝟", "𝝟"}, + {"𝝠", "𝝠"}, + {"𝝡", "𝝡"}, + {"𝝢", "𝝢"}, + {"𝝣", "𝝣"}, + {"𝝤", "𝝤"}, + {"𝝥", "𝝥"}, + {"𝝦", "𝝦"}, + {"𝝧", "𝝧"}, + {"𝝨", "𝝨"}, + {"𝝩", "𝝩"}, + {"𝝪", "𝝪"}, + {"𝝫", "𝝫"}, + {"𝝬", "𝝬"}, + {"𝝭", "𝝭"}, + {"𝝮", "𝝮"}, + {"𝞐", "𝞐"}, + {"𝞑", "𝞑"}, + {"𝞒", "𝞒"}, + {"𝞓", "𝞓"}, + {"𝞔", "𝞔"}, + {"𝞕", "𝞕"}, + {"𝞖", "𝞖"}, + {"𝞗", "𝞗"}, + {"𝞘", "𝞘"}, + {"𝞙", "𝞙"}, + {"𝞚", "𝞚"}, + {"𝞛", "𝞛"}, + {"𝞜", "𝞜"}, + {"𝞝", "𝞝"}, + {"𝞞", "𝞞"}, + {"𝞟", "𝞟"}, + {"𝞠", "𝞠"}, + {"𝞡", "𝞡"}, + {"𝞢", "𝞢"}, + {"𝞣", "𝞣"}, + {"𝞤", "𝞤"}, + {"𝞥", "𝞥"}, + {"𝞦", "𝞦"}, + {"𝞧", "𝞧"}, + {"𝞨", "𝞨"}, + {"𝟊", "𝟊"}, + {"𞤀", "𞤢"}, + {"𞤁", "𞤣"}, + {"𞤂", "𞤤"}, + {"𞤃", "𞤥"}, + {"𞤄", "𞤦"}, + {"𞤅", "𞤧"}, + {"𞤆", "𞤨"}, + {"𞤇", "𞤩"}, + {"𞤈", "𞤪"}, + {"𞤉", "𞤫"}, + {"𞤊", "𞤬"}, + {"𞤋", "𞤭"}, + {"𞤌", "𞤮"}, + {"𞤍", "𞤯"}, + {"𞤎", "𞤰"}, + {"𞤏", "𞤱"}, + {"𞤐", "𞤲"}, + {"𞤑", "𞤳"}, + {"𞤒", "𞤴"}, + {"𞤓", "𞤵"}, + {"𞤔", "𞤶"}, + {"𞤕", "𞤷"}, + {"𞤖", "𞤸"}, + {"𞤗", "𞤹"}, + {"𞤘", "𞤺"}, + {"𞤙", "𞤻"}, + {"𞤚", "𞤼"}, + {"𞤛", "𞤽"}, + {"𞤜", "𞤾"}, + {"𞤝", "𞤿"}, + {"𞤞", "𞥀"}, + {"𞤟", "𞥁"}, + {"𞤠", "𞥂"}, + {"𞤡", "𞥃"}, +}; + +} + diff --git a/common/include/util.hpp b/common/include/util.hpp index daf0560c9f359f565b929499afa99efbda347f7d..9acb179402362c20edc7e904bc11f5a3ea243921 100644 --- a/common/include/util.hpp +++ b/common/include/util.hpp @@ -16,7 +16,6 @@ namespace util { - void warning(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); void error(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); void error(const std::exception & e, const std::experimental::source_location & location = std::experimental::source_location::current()); @@ -102,6 +101,8 @@ std::string join(const std::string & delim, const boost::circular_buffer<T> elem return result; } +std::string lower(const std::string & s); + }; template <> diff --git a/common/src/util.cpp b/common/src/util.cpp index fb5308b39b9d308986d7ecd19ed7ee7412ec6805..3b8aa964bfb72d03914b4071608c7d0b5ddc6026 100644 --- a/common/src/util.cpp +++ b/common/src/util.cpp @@ -2,6 +2,7 @@ #include "utf8.hpp" #include <ctime> #include <algorithm> +#include "upper2lower" int util::printedLength(std::string_view s) { @@ -199,8 +200,19 @@ bool util::choiceWithProbability(float probability) bool util::isUppercase(utf8char c) { - static std::set<utf8char> uppercaseChars{"A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","À","Á","Â","Ã","Ä","Å","Æ","Ç","È","É","Ê","Ë","Ì","Í","Î","Ï","Ð","Ñ","Ò","Ó","Ô","Õ","Ö","Ø","Ù","Ú","Û","Ü","Ý","Þ","Ā","Ă","Ą","Ć","Ĉ","Ċ","Č","Ď","Đ","Ē","Ĕ","Ė","Ę","Ě","Ĝ","Ğ","Ġ","Ģ","Ĥ","Ħ","Ĩ","Ī","Ĭ","Į","İ","IJ","Ĵ","Ķ","Ĺ","Ļ","Ľ","Ŀ","Ł","Ń","Ņ","Ň","Ŋ","Ō","Ŏ","Ő","Œ","Ŕ","Ŗ","Ř","Ś","Ŝ","Ş","Š","Ţ","Ť","Ŧ","Ũ","Ū","Ŭ","Ů","Ű","Ų","Ŵ","Ŷ","Ÿ","Ź","Ż","Ž","Ɓ","Ƃ","Ƅ","Ɔ","Ƈ","Ɖ","Ɗ","Ƌ","Ǝ","Ə","Ɛ","Ƒ","Ɠ","Ɣ","Ɩ","Ɨ","Ƙ","Ɯ","Ɲ","Ɵ","Ơ","Ƣ","Ƥ","Ʀ","Ƨ","Ʃ","Ƭ","Ʈ","Ư","Ʊ","Ʋ","Ƴ","Ƶ","Ʒ","Ƹ","Ƽ","DŽ","LJ","NJ","Ǎ","Ǐ","Ǒ","Ǔ","Ǖ","Ǘ","Ǚ","Ǜ","Ǟ","Ǡ","Ǣ","Ǥ","Ǧ","Ǩ","Ǫ","Ǭ","Ǯ","DZ","Ǵ","Ƕ","Ƿ","Ǹ","Ǻ","Ǽ","Ǿ","Ȁ","Ȃ","Ȅ","Ȇ","Ȉ","Ȋ","Ȍ","Ȏ","Ȑ","Ȓ","Ȕ","Ȗ","Ș","Ț","Ȝ","Ȟ","Ƞ","Ȣ","Ȥ","Ȧ","Ȩ","Ȫ","Ȭ","Ȯ","Ȱ","Ȳ","Ⱥ","Ȼ","Ƚ","Ⱦ","Ɂ","Ƀ","Ʉ","Ʌ","Ɇ","Ɉ","Ɋ","Ɍ","Ɏ","Ͱ","Ͳ","Ͷ","Ϳ","Ά","Έ","Ή","Ί","Ό","Ύ","Ώ","Α","Β","Γ","Δ","Ε","Ζ","Η","Θ","Ι","Κ","Λ","Μ","Ν","Ξ","Ο","Π","Ρ","Σ","Τ","Υ","Φ","Χ","Ψ","Ω","Ϊ","Ϋ","Ϗ","ϒ","ϓ","ϔ","Ϙ","Ϛ","Ϝ","Ϟ","Ϡ","Ϣ","Ϥ","Ϧ","Ϩ","Ϫ","Ϭ","Ϯ","ϴ","Ϸ","Ϲ","Ϻ","Ͻ","Ͼ","Ͽ","Ѐ","Ё","Ђ","Ѓ","Є","Ѕ","І","Ї","Ј","Љ","Њ","Ћ","Ќ","Ѝ","Ў","Џ","А","Б","В","Г","Д","Е","Ж","З","И","Й","К","Л","М","Н","О","П","Р","С","Т","У","Ф","Х","Ц","Ч","Ш","Щ","Ъ","Ы","Ь","Э","Ю","Я","Ѡ","Ѣ","Ѥ","Ѧ","Ѩ","Ѫ","Ѭ","Ѯ","Ѱ","Ѳ","Ѵ","Ѷ","Ѹ","Ѻ","Ѽ","Ѿ","Ҁ","Ҋ","Ҍ","Ҏ","Ґ","Ғ","Ҕ","Җ","Ҙ","Қ","Ҝ","Ҟ","Ҡ","Ң","Ҥ","Ҧ","Ҩ","Ҫ","Ҭ","Ү","Ұ","Ҳ","Ҵ","Ҷ","Ҹ","Һ","Ҽ","Ҿ","Ӏ","Ӂ","Ӄ","Ӆ","Ӈ","Ӊ","Ӌ","Ӎ","Ӑ","Ӓ","Ӕ","Ӗ","Ә","Ӛ","Ӝ","Ӟ","Ӡ","Ӣ","Ӥ","Ӧ","Ө","Ӫ","Ӭ","Ӯ","Ӱ","Ӳ","Ӵ","Ӷ","Ӹ","Ӻ","Ӽ","Ӿ","Ԁ","Ԃ","Ԅ","Ԇ","Ԉ","Ԋ","Ԍ","Ԏ","Ԑ","Ԓ","Ԕ","Ԗ","Ԙ","Ԛ","Ԝ","Ԟ","Ԡ","Ԣ","Ԥ","Ԧ","Ԩ","Ԫ","Ԭ","Ԯ","Ա","Բ","Գ","Դ","Ե","Զ","Է","Ը","Թ","Ժ","Ի","Լ","Խ","Ծ","Կ","Հ","Ձ","Ղ","Ճ","Մ","Յ","Ն","Շ","Ո","Չ","Պ","Ջ","Ռ","Ս","Վ","Տ","Ր","Ց","Ւ","Փ","Ք","Օ","Ֆ","Ⴀ","Ⴁ","Ⴂ","Ⴃ","Ⴄ","Ⴅ","Ⴆ","Ⴇ","Ⴈ","Ⴉ","Ⴊ","Ⴋ","Ⴌ","Ⴍ","Ⴎ","Ⴏ","Ⴐ","Ⴑ","Ⴒ","Ⴓ","Ⴔ","Ⴕ","Ⴖ","Ⴗ","Ⴘ","Ⴙ","Ⴚ","Ⴛ","Ⴜ","Ⴝ","Ⴞ","Ⴟ","Ⴠ","Ⴡ","Ⴢ","Ⴣ","Ⴤ","Ⴥ","Ⴧ","Ⴭ","Ꭰ","Ꭱ","Ꭲ","Ꭳ","Ꭴ","Ꭵ","Ꭶ","Ꭷ","Ꭸ","Ꭹ","Ꭺ","Ꭻ","Ꭼ","Ꭽ","Ꭾ","Ꭿ","Ꮀ","Ꮁ","Ꮂ","Ꮃ","Ꮄ","Ꮅ","Ꮆ","Ꮇ","Ꮈ","Ꮉ","Ꮊ","Ꮋ","Ꮌ","Ꮍ","Ꮎ","Ꮏ","Ꮐ","Ꮑ","Ꮒ","Ꮓ","Ꮔ","Ꮕ","Ꮖ","Ꮗ","Ꮘ","Ꮙ","Ꮚ","Ꮛ","Ꮜ","Ꮝ","Ꮞ","Ꮟ","Ꮠ","Ꮡ","Ꮢ","Ꮣ","Ꮤ","Ꮥ","Ꮦ","Ꮧ","Ꮨ","Ꮩ","Ꮪ","Ꮫ","Ꮬ","Ꮭ","Ꮮ","Ꮯ","Ꮰ","Ꮱ","Ꮲ","Ꮳ","Ꮴ","Ꮵ","Ꮶ","Ꮷ","Ꮸ","Ꮹ","Ꮺ","Ꮻ","Ꮼ","Ꮽ","Ꮾ","Ꮿ","Ᏸ","Ᏹ","Ᏺ","Ᏻ","Ᏼ","Ᏽ","Ა","Ბ","Გ","Დ","Ე","Ვ","Ზ","Თ","Ი","Კ","Ლ","Მ","Ნ","Ო","Პ","Ჟ","Რ","Ს","Ტ","Უ","Ფ","Ქ","Ღ","Ყ","Შ","Ჩ","Ც","Ძ","Წ","Ჭ","Ხ","Ჯ","Ჰ","Ჱ","Ჲ","Ჳ","Ჴ","Ჵ","Ჶ","Ჷ","Ჸ","Ჹ","Ჺ","Ჽ","Ჾ","Ჿ","Ḁ","Ḃ","Ḅ","Ḇ","Ḉ","Ḋ","Ḍ","Ḏ","Ḑ","Ḓ","Ḕ","Ḗ","Ḙ","Ḛ","Ḝ","Ḟ","Ḡ","Ḣ","Ḥ","Ḧ","Ḩ","Ḫ","Ḭ","Ḯ","Ḱ","Ḳ","Ḵ","Ḷ","Ḹ","Ḻ","Ḽ","Ḿ","Ṁ","Ṃ","Ṅ","Ṇ","Ṉ","Ṋ","Ṍ","Ṏ","Ṑ","Ṓ","Ṕ","Ṗ","Ṙ","Ṛ","Ṝ","Ṟ","Ṡ","Ṣ","Ṥ","Ṧ","Ṩ","Ṫ","Ṭ","Ṯ","Ṱ","Ṳ","Ṵ","Ṷ","Ṹ","Ṻ","Ṽ","Ṿ","Ẁ","Ẃ","Ẅ","Ẇ","Ẉ","Ẋ","Ẍ","Ẏ","Ẑ","Ẓ","Ẕ","ẞ","Ạ","Ả","Ấ","Ầ","Ẩ","Ẫ","Ậ","Ắ","Ằ","Ẳ","Ẵ","Ặ","Ẹ","Ẻ","Ẽ","Ế","Ề","Ể","Ễ","Ệ","Ỉ","Ị","Ọ","Ỏ","Ố","Ồ","Ổ","Ỗ","Ộ","Ớ","Ờ","Ở","Ỡ","Ợ","Ụ","Ủ","Ứ","Ừ","Ử","Ữ","Ự","Ỳ","Ỵ","Ỷ","Ỹ","Ỻ","Ỽ","Ỿ","Ἀ","Ἁ","Ἂ","Ἃ","Ἄ","Ἅ","Ἆ","Ἇ","Ἐ","Ἑ","Ἒ","Ἓ","Ἔ","Ἕ","Ἠ","Ἡ","Ἢ","Ἣ","Ἤ","Ἥ","Ἦ","Ἧ","Ἰ","Ἱ","Ἲ","Ἳ","Ἴ","Ἵ","Ἶ","Ἷ","Ὀ","Ὁ","Ὂ","Ὃ","Ὄ","Ὅ","Ὑ","Ὓ","Ὕ","Ὗ","Ὠ","Ὡ","Ὢ","Ὣ","Ὤ","Ὥ","Ὦ","Ὧ","Ᾰ","Ᾱ","Ὰ","Ά","Ὲ","Έ","Ὴ","Ή","Ῐ","Ῑ","Ὶ","Ί","Ῠ","Ῡ","Ὺ","Ύ","Ῥ","Ὸ","Ό","Ὼ","Ώ","ℂ","ℇ","ℋ","ℌ","ℍ","ℐ","ℑ","ℒ","ℕ","ℙ","ℚ","ℛ","ℜ","ℝ","ℤ","Ω","ℨ","K","Å","ℬ","ℭ","ℰ","ℱ","Ⅎ","ℳ","ℾ","ℿ","ⅅ","Ↄ","Ⰰ","Ⰱ","Ⰲ","Ⰳ","Ⰴ","Ⰵ","Ⰶ","Ⰷ","Ⰸ","Ⰹ","Ⰺ","Ⰻ","Ⰼ","Ⰽ","Ⰾ","Ⰿ","Ⱀ","Ⱁ","Ⱂ","Ⱃ","Ⱄ","Ⱅ","Ⱆ","Ⱇ","Ⱈ","Ⱉ","Ⱊ","Ⱋ","Ⱌ","Ⱍ","Ⱎ","Ⱏ","Ⱐ","Ⱑ","Ⱒ","Ⱓ","Ⱔ","Ⱕ","Ⱖ","Ⱗ","Ⱘ","Ⱙ","Ⱚ","Ⱛ","Ⱜ","Ⱝ","Ⱞ","Ⱡ","Ɫ","Ᵽ","Ɽ","Ⱨ","Ⱪ","Ⱬ","Ɑ","Ɱ","Ɐ","Ɒ","Ⱳ","Ⱶ","Ȿ","Ɀ","Ⲁ","Ⲃ","Ⲅ","Ⲇ","Ⲉ","Ⲋ","Ⲍ","Ⲏ","Ⲑ","Ⲓ","Ⲕ","Ⲗ","Ⲙ","Ⲛ","Ⲝ","Ⲟ","Ⲡ","Ⲣ","Ⲥ","Ⲧ","Ⲩ","Ⲫ","Ⲭ","Ⲯ","Ⲱ","Ⲳ","Ⲵ","Ⲷ","Ⲹ","Ⲻ","Ⲽ","Ⲿ","Ⳁ","Ⳃ","Ⳅ","Ⳇ","Ⳉ","Ⳋ","Ⳍ","Ⳏ","Ⳑ","Ⳓ","Ⳕ","Ⳗ","Ⳙ","Ⳛ","Ⳝ","Ⳟ","Ⳡ","Ⳣ","Ⳬ","Ⳮ","Ⳳ","Ꙁ","Ꙃ","Ꙅ","Ꙇ","Ꙉ","Ꙋ","Ꙍ","Ꙏ","Ꙑ","Ꙓ","Ꙕ","Ꙗ","Ꙙ","Ꙛ","Ꙝ","Ꙟ","Ꙡ","Ꙣ","Ꙥ","Ꙧ","Ꙩ","Ꙫ","Ꙭ","Ꚁ","Ꚃ","Ꚅ","Ꚇ","Ꚉ","Ꚋ","Ꚍ","Ꚏ","Ꚑ","Ꚓ","Ꚕ","Ꚗ","Ꚙ","Ꚛ","Ꜣ","Ꜥ","Ꜧ","Ꜩ","Ꜫ","Ꜭ","Ꜯ","Ꜳ","Ꜵ","Ꜷ","Ꜹ","Ꜻ","Ꜽ","Ꜿ","Ꝁ","Ꝃ","Ꝅ","Ꝇ","Ꝉ","Ꝋ","Ꝍ","Ꝏ","Ꝑ","Ꝓ","Ꝕ","Ꝗ","Ꝙ","Ꝛ","Ꝝ","Ꝟ","Ꝡ","Ꝣ","Ꝥ","Ꝧ","Ꝩ","Ꝫ","Ꝭ","Ꝯ","Ꝺ","Ꝼ","Ᵹ","Ꝿ","Ꞁ","Ꞃ","Ꞅ","Ꞇ","Ꞌ","Ɥ","Ꞑ","Ꞓ","Ꞗ","Ꞙ","Ꞛ","Ꞝ","Ꞟ","Ꞡ","Ꞣ","Ꞥ","Ꞧ","Ꞩ","Ɦ","Ɜ","Ɡ","Ɬ","Ɪ","Ʞ","Ʇ","Ʝ","Ꭓ","Ꞵ","Ꞷ","Ꞹ","Ꞻ","Ꞽ","Ꞿ","Ꟃ","Ꞔ","Ʂ","Ᶎ","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","𐐀","𐐁","𐐂","𐐃","𐐄","𐐅","𐐆","𐐇","𐐈","𐐉","𐐊","𐐋","𐐌","𐐍","𐐎","𐐏","𐐐","𐐑","𐐒","𐐓","𐐔","𐐕","𐐖","𐐗","𐐘","𐐙","𐐚","𐐛","𐐜","𐐝","𐐞","𐐟","𐐠","𐐡","𐐢","𐐣","𐐤","𐐥","𐐦","𐐧","𐒰","𐒱","𐒲","𐒳","𐒴","𐒵","𐒶","𐒷","𐒸","𐒹","𐒺","𐒻","𐒼","𐒽","𐒾","𐒿","𐓀","𐓁","𐓂","𐓃","𐓄","𐓅","𐓆","𐓇","𐓈","𐓉","𐓊","𐓋","𐓌","𐓍","𐓎","𐓏","𐓐","𐓑","𐓒","𐓓","𐲀","𐲁","𐲂","𐲃","𐲄","𐲅","𐲆","𐲇","𐲈","𐲉","𐲊","𐲋","𐲌","𐲍","𐲎","𐲏","𐲐","𐲑","𐲒","𐲓","𐲔","𐲕","𐲖","𐲗","𐲘","𐲙","𐲚","𐲛","𐲜","𐲝","𐲞","𐲟","𐲠","𐲡","𐲢","𐲣","𐲤","𐲥","𐲦","𐲧","𐲨","𐲩","𐲪","𐲫","𐲬","𐲭","𐲮","𐲯","𐲰","𐲱","𐲲","𑢠","𑢡","𑢢","𑢣","𑢤","𑢥","𑢦","𑢧","𑢨","𑢩","𑢪","𑢫","𑢬","𑢭","𑢮","𑢯","𑢰","𑢱","𑢲","𑢳","𑢴","𑢵","𑢶","𑢷","𑢸","𑢹","𑢺","𑢻","𑢼","𑢽","𑢾","𑢿","𖹀","𖹁","𖹂","𖹃","𖹄","𖹅","𖹆","𖹇","𖹈","𖹉","𖹊","𖹋","𖹌","𖹍","𖹎","𖹏","𖹐","𖹑","𖹒","𖹓","𖹔","𖹕","𖹖","𖹗","𖹘","𖹙","𖹚","𖹛","𖹜","𖹝","𖹞","𖹟","𝐀","𝐁","𝐂","𝐃","𝐄","𝐅","𝐆","𝐇","𝐈","𝐉","𝐊","𝐋","𝐌","𝐍","𝐎","𝐏","𝐐","𝐑","𝐒","𝐓","𝐔","𝐕","𝐖","𝐗","𝐘","𝐙","𝐴","𝐵","𝐶","𝐷","𝐸","𝐹","𝐺","𝐻","𝐼","𝐽","𝐾","𝐿","𝑀","𝑁","𝑂","𝑃","𝑄","𝑅","𝑆","𝑇","𝑈","𝑉","𝑊","𝑋","𝑌","𝑍","𝑨","𝑩","𝑪","𝑫","𝑬","𝑭","𝑮","𝑯","𝑰","𝑱","𝑲","𝑳","𝑴","𝑵","𝑶","𝑷","𝑸","𝑹","𝑺","𝑻","𝑼","𝑽","𝑾","𝑿","𝒀","𝒁","𝒜","𝒞","𝒟","𝒢","𝒥","𝒦","𝒩","𝒪","𝒫","𝒬","𝒮","𝒯","𝒰","𝒱","𝒲","𝒳","𝒴","𝒵","𝓐","𝓑","𝓒","𝓓","𝓔","𝓕","𝓖","𝓗","𝓘","𝓙","𝓚","𝓛","𝓜","𝓝","𝓞","𝓟","𝓠","𝓡","𝓢","𝓣","𝓤","𝓥","𝓦","𝓧","𝓨","𝓩","𝔄","𝔅","𝔇","𝔈","𝔉","𝔊","𝔍","𝔎","𝔏","𝔐","𝔑","𝔒","𝔓","𝔔","𝔖","𝔗","𝔘","𝔙","𝔚","𝔛","𝔜","𝔸","𝔹","𝔻","𝔼","𝔽","𝔾","𝕀","𝕁","𝕂","𝕃","𝕄","𝕆","𝕊","𝕋","𝕌","𝕍","𝕎","𝕏","𝕐","𝕬","𝕭","𝕮","𝕯","𝕰","𝕱","𝕲","𝕳","𝕴","𝕵","𝕶","𝕷","𝕸","𝕹","𝕺","𝕻","𝕼","𝕽","𝕾","𝕿","𝖀","𝖁","𝖂","𝖃","𝖄","𝖅","𝖠","𝖡","𝖢","𝖣","𝖤","𝖥","𝖦","𝖧","𝖨","𝖩","𝖪","𝖫","𝖬","𝖭","𝖮","𝖯","𝖰","𝖱","𝖲","𝖳","𝖴","𝖵","𝖶","𝖷","𝖸","𝖹","𝗔","𝗕","𝗖","𝗗","𝗘","𝗙","𝗚","𝗛","𝗜","𝗝","𝗞","𝗟","𝗠","𝗡","𝗢","𝗣","𝗤","𝗥","𝗦","𝗧","𝗨","𝗩","𝗪","𝗫","𝗬","𝗭","𝘈","𝘉","𝘊","𝘋","𝘌","𝘍","𝘎","𝘏","𝘐","𝘑","𝘒","𝘓","𝘔","𝘕","𝘖","𝘗","𝘘","𝘙","𝘚","𝘛","𝘜","𝘝","𝘞","𝘟","𝘠","𝘡","𝘼","𝘽","𝘾","𝘿","𝙀","𝙁","𝙂","𝙃","𝙄","𝙅","𝙆","𝙇","𝙈","𝙉","𝙊","𝙋","𝙌","𝙍","𝙎","𝙏","𝙐","𝙑","𝙒","𝙓","𝙔","𝙕","𝙰","𝙱","𝙲","𝙳","𝙴","𝙵","𝙶","𝙷","𝙸","𝙹","𝙺","𝙻","𝙼","𝙽","𝙾","𝙿","𝚀","𝚁","𝚂","𝚃","𝚄","𝚅","𝚆","𝚇","𝚈","𝚉","𝚨","𝚩","𝚪","𝚫","𝚬","𝚭","𝚮","𝚯","𝚰","𝚱","𝚲","𝚳","𝚴","𝚵","𝚶","𝚷","𝚸","𝚹","𝚺","𝚻","𝚼","𝚽","𝚾","𝚿","𝛀","𝛢","𝛣","𝛤","𝛥","𝛦","𝛧","𝛨","𝛩","𝛪","𝛫","𝛬","𝛭","𝛮","𝛯","𝛰","𝛱","𝛲","𝛳","𝛴","𝛵","𝛶","𝛷","𝛸","𝛹","𝛺","𝜜","𝜝","𝜞","𝜟","𝜠","𝜡","𝜢","𝜣","𝜤","𝜥","𝜦","𝜧","𝜨","𝜩","𝜪","𝜫","𝜬","𝜭","𝜮","𝜯","𝜰","𝜱","𝜲","𝜳","𝜴","𝝖","𝝗","𝝘","𝝙","𝝚","𝝛","𝝜","𝝝","𝝞","𝝟","𝝠","𝝡","𝝢","𝝣","𝝤","𝝥","𝝦","𝝧","𝝨","𝝩","𝝪","𝝫","𝝬","𝝭","𝝮","𝞐","𝞑","𝞒","𝞓","𝞔","𝞕","𝞖","𝞗","𝞘","𝞙","𝞚","𝞛","𝞜","𝞝","𝞞","𝞟","𝞠","𝞡","𝞢","𝞣","𝞤","𝞥","𝞦","𝞧","𝞨","𝟊","𞤀","𞤁","𞤂","𞤃","𞤄","𞤅","𞤆","𞤇","𞤈","𞤉","𞤊","𞤋","𞤌","𞤍","𞤎","𞤏","𞤐","𞤑","𞤒","𞤓","𞤔","𞤕","𞤖","𞤗","𞤘","𞤙","𞤚","𞤛","𞤜","𞤝","𞤞","𞤟","𞤠","𞤡"}; + return upper2lower.count(c); +} + +std::string util::lower(const std::string & s) +{ + auto splited = util::splitAsUtf8(s); + for (auto & c : splited) + { + auto it = upper2lower.find(c); + if (it != upper2lower.end()) + c = it->second; + } - return uppercaseChars.count(c); + return fmt::format("{}", splited); } diff --git a/torch_modules/include/ContextModule.hpp b/torch_modules/include/ContextModule.hpp index b2f33cfed187c0f910cfafa2baea78102605aba5..2276aaaf5c287d11f67377323a4c40851c6f3618 100644 --- a/torch_modules/include/ContextModule.hpp +++ b/torch_modules/include/ContextModule.hpp @@ -15,6 +15,7 @@ class ContextModuleImpl : public Submodule torch::nn::Embedding wordEmbeddings{nullptr}; std::shared_ptr<MyModule> myModule{nullptr}; std::vector<std::string> columns; + std::map<std::size_t, std::function<std::string(const std::string &)>> functions; std::vector<int> bufferContext; std::vector<int> stackContext; int inSize; diff --git a/torch_modules/include/FocusedColumnModule.hpp b/torch_modules/include/FocusedColumnModule.hpp index cfd9c32cf0823704f6f16767cf9ad29745b67594..7ebd6c508b591d5f36f475b165c24de079d46cab 100644 --- a/torch_modules/include/FocusedColumnModule.hpp +++ b/torch_modules/include/FocusedColumnModule.hpp @@ -16,6 +16,7 @@ class FocusedColumnModuleImpl : public Submodule std::shared_ptr<MyModule> myModule{nullptr}; std::vector<int> focusedBuffer, focusedStack; std::string column; + std::function<std::string(const std::string&)> func{[](const std::string &s){return s;}}; int maxNbElements; int inSize; diff --git a/torch_modules/include/Submodule.hpp b/torch_modules/include/Submodule.hpp index 0a402c2859cfb55c6ba2c75bd5eb0c26616fbb81..6e8f6898c637574e16da10351870e41723c32c75 100644 --- a/torch_modules/include/Submodule.hpp +++ b/torch_modules/include/Submodule.hpp @@ -22,6 +22,7 @@ class Submodule : public torch::nn::Module, public DictHolder, public StateHolde virtual void addToContext(std::vector<std::vector<long>> & context, const Config & config) = 0; virtual torch::Tensor forward(torch::Tensor input) = 0; virtual void registerEmbeddings(std::filesystem::path pretrained) = 0; + const std::function<std::string(const std::string &)> & getFunction(const std::string functionName); }; #endif diff --git a/torch_modules/src/ContextModule.cpp b/torch_modules/src/ContextModule.cpp index 21723b14d6af1c3ae2e76bf3e899bf3b513e28c8..244944d7ce315fbf38f263c71790bcb9c6cdd9dd 100644 --- a/torch_modules/src/ContextModule.cpp +++ b/torch_modules/src/ContextModule.cpp @@ -14,7 +14,17 @@ ContextModuleImpl::ContextModuleImpl(std::string name, const std::string & defin for (auto & index : util::split(sm.str(2), ' ')) stackContext.emplace_back(std::stoi(index)); - columns = util::split(sm.str(3), ' '); + auto funcColumns = util::split(sm.str(3), ' '); + columns.clear(); + for (auto & funcCol : funcColumns) + { + auto splited = util::split(funcCol, ':'); + if (splited.size() > 2) + util::myThrow(fmt::format("invalid function:column '{}' of size {}", funcCol, splited.size())); + if (splited.size() == 2) + functions[columns.size()] = getFunction(splited[0]); + columns.emplace_back(splited.back()); + } auto subModuleType = sm.str(4); auto subModuleArguments = util::split(sm.str(5), ' '); @@ -67,7 +77,9 @@ void ContextModuleImpl::addToContext(std::vector<std::vector<long>> & context, c contextIndexes.emplace_back(-1); for (auto index : contextIndexes) - for (auto & col : columns) + for (unsigned int colIndex = 0; colIndex < columns.size(); colIndex++) + { + auto & col = columns[colIndex]; if (index == -1) { for (auto & contextElement : context) @@ -75,11 +87,16 @@ void ContextModuleImpl::addToContext(std::vector<std::vector<long>> & context, c } else { - int dictIndex = dict.getIndexOrInsert(config.getAsFeature(col, index)); + int dictIndex; + if (functions.count(colIndex)) + dictIndex = dict.getIndexOrInsert(functions.at(colIndex)(config.getAsFeature(col, index))); + else + dictIndex = dict.getIndexOrInsert(config.getAsFeature(col, index)); for (auto & contextElement : context) contextElement.push_back(dictIndex); } + } } torch::Tensor ContextModuleImpl::forward(torch::Tensor input) diff --git a/torch_modules/src/FocusedColumnModule.cpp b/torch_modules/src/FocusedColumnModule.cpp index 5ac927ab245522cc9fee405949515fbc0a1ab730..5eb0db28499009e74136f10d12a1fd4b43c8cd7b 100644 --- a/torch_modules/src/FocusedColumnModule.cpp +++ b/torch_modules/src/FocusedColumnModule.cpp @@ -8,7 +8,12 @@ FocusedColumnModuleImpl::FocusedColumnModuleImpl(std::string name, const std::st { try { - column = sm.str(1); + auto funcCol = util::split(sm.str(1), ':'); + if (funcCol.size() > 2) + util::myThrow(fmt::format("invalid function:column '{}' of size {}", sm.str(1), funcCol.size())); + if (funcCol.size() == 2) + func = getFunction(funcCol[0]); + column = funcCol.back(); maxNbElements = std::stoi(sm.str(2)); for (auto & index : util::split(sm.str(3), ' ')) @@ -90,7 +95,7 @@ void FocusedColumnModuleImpl::addToContext(std::vector<std::vector<long>> & cont std::vector<std::string> elements; if (column == "FORM") { - auto asUtf8 = util::splitAsUtf8(config.getAsFeature(column, index).get()); + auto asUtf8 = util::splitAsUtf8(func(config.getAsFeature(column, index).get())); for (int i = 0; i < maxNbElements; i++) if (i < (int)asUtf8.size()) @@ -100,7 +105,7 @@ void FocusedColumnModuleImpl::addToContext(std::vector<std::vector<long>> & cont } else if (column == "FEATS") { - auto splited = util::split(config.getAsFeature(column, index).get(), '|'); + auto splited = util::split(func(config.getAsFeature(column, index).get()), '|'); for (int i = 0; i < maxNbElements; i++) if (i < (int)splited.size()) @@ -119,12 +124,12 @@ void FocusedColumnModuleImpl::addToContext(std::vector<std::vector<long>> & cont } else if (column == "EOS") { - bool isEOS = config.getAsFeature(Config::EOSColName, index) == Config::EOSSymbol1; + bool isEOS = func(config.getAsFeature(Config::EOSColName, index)) == Config::EOSSymbol1; elements.emplace_back(fmt::format("EOS({})", isEOS)); } else { - elements.emplace_back(config.getAsFeature(column, index)); + elements.emplace_back(func(config.getAsFeature(column, index))); } if ((int)elements.size() != maxNbElements) diff --git a/torch_modules/src/Submodule.cpp b/torch_modules/src/Submodule.cpp index 0cfa723132875b825590c52b099d4334547a7fcd..a87f44669b3a6c58befe4271f4f83e7ae09af5e4 100644 --- a/torch_modules/src/Submodule.cpp +++ b/torch_modules/src/Submodule.cpp @@ -63,3 +63,17 @@ void Submodule::loadPretrainedW2vEmbeddings(torch::nn::Embedding & embeddings, s getDict().setState(originalState); } +const std::function<std::string(const std::string &)> & Submodule::getFunction(const std::string functionName) +{ + static std::map<std::string, std::function<std::string(const std::string &)>> functions + { + {"lower", [](const std::string & s) {return util::lower(s);}} + }; + + auto it = functions.find(util::lower(functionName)); + if (it == functions.end()) + util::myThrow(fmt::format("unknown function name '{}'", functionName)); + + return it->second; +} +