diff --git a/CMakeLists.txt b/CMakeLists.txt
index 85d4e4bcdce693d1da0be09721efe1118d02158e..a94a43c37b96a18c5b65627e3d311e8fdde884d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,5 +8,6 @@ include_directories(maca_common/include)
 add_subdirectory(maca_common)
 add_subdirectory(maca_lemmatizer)
 add_subdirectory(maca_trans_parser)
+add_subdirectory(maca_crf_tagger)
 
 #set(CMAKE_INSTALL_PREFIX ../)
diff --git a/maca_crf_tagger/CMakeLists.txt b/maca_crf_tagger/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6df7df6ef265f3d1eafcd632372b032d8aa19344
--- /dev/null
+++ b/maca_crf_tagger/CMakeLists.txt
@@ -0,0 +1,24 @@
+include_directories(src)
+
+#compiling, linking and installing executables
+
+add_executable(crf_barebones_decoder ./src/crf_barebones_decoder.cc)
+target_compile_options(crf_barebones_decoder PRIVATE -std=c++11)
+install (TARGETS crf_barebones_decoder DESTINATION bin)
+
+#add_executable(test_simple_tagger ./src/test_simple_tagger.cc)
+#target_compile_options(test_simple_tagger PRIVATE -std=c++11)
+#install (TARGETS test_simple_tagger DESTINATION bin)
+
+add_executable(apply_template_crfsuite ./src/apply_template_crfsuite.cc)
+target_compile_options(apply_template_crfsuite PRIVATE -std=c++11)
+install (TARGETS apply_template_crfsuite DESTINATION bin)
+
+add_executable(maca_crf_convert_binmodel ./src/maca_crf_convert_binmodel.cc)
+target_compile_options(maca_crf_convert_binmodel PRIVATE -std=c++11)
+install (TARGETS maca_crf_convert_binmodel DESTINATION bin)
+
+add_executable(maca_crf_convert_binlexicon ./src/maca_crf_convert_binlexicon.cc)
+target_compile_options(maca_crf_convert_binlexicon PRIVATE -std=c++11)
+install (TARGETS maca_crf_convert_binlexicon DESTINATION bin)
+
diff --git a/maca_crf_tagger/src/apply_template_crfsuite.cc b/maca_crf_tagger/src/apply_template_crfsuite.cc
new file mode 100644
index 0000000000000000000000000000000000000000..eba51ba44b7e638b6bcc9b88ac41fc86b7078111
--- /dev/null
+++ b/maca_crf_tagger/src/apply_template_crfsuite.cc
@@ -0,0 +1,72 @@
+#include <string>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include "crf_template.hh"
+
+// http://www.oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html
+static void tokenize(const std::string& str, std::vector<std::string>& tokens, const std::string& delimiters = " ")
+{
+    std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+    std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
+    while (std::string::npos != pos || std::string::npos != lastPos)
+    {
+        tokens.push_back(str.substr(lastPos, pos - lastPos));
+        lastPos = str.find_first_not_of(delimiters, pos);
+        pos = str.find_first_of(delimiters, lastPos);
+    }
+}
+
+static void replace(std::string& str, const std::string &search, const std::string &replacement) {
+    std::string::size_type pos = 0;
+    while ((pos = str.find(search, pos)) != std::string::npos) {
+        str.replace(pos, search.size(), replacement);
+        pos += replacement.size();
+    }
+}
+
+int main(int argc, char** argv) {
+    if(argc != 2) {
+        std::cerr << "usage: cat <input> | " << argv[0] << " <template>\n";
+        return 1;
+    }
+    std::vector<macaon::CRFPPTemplate> templates;
+    std::ifstream templateFile(argv[1]);
+    while(!templateFile.eof()) {
+        std::string line;
+        std::getline(templateFile, line);
+        if(templateFile.eof()) break;
+        macaon::CRFPPTemplate current(line.c_str());
+        if(current.type != macaon::CRFPPTemplate::BIGRAM) templates.push_back(current);
+        //std::cerr << templates.back() << std::endl;
+    }
+    std::vector<std::vector<std::string> > lines;
+    while(!std::cin.eof()) {
+        std::string line;
+        std::getline(std::cin, line);
+        if(std::cin.eof()) break;
+        std::vector<std::string> tokens;
+        tokenize(line, tokens, " \t");
+        if(tokens.size() == 0) {
+            for(int position = 0; position < (int) lines.size(); position++) {
+                std::string label = lines[position][lines[position].size() - 1];
+                replace(label, "\\", "\\\\");
+                replace(label, ":", "\\:");
+                std::cout << label;
+                for(std::vector<macaon::CRFPPTemplate>::const_iterator i = templates.begin(); i != templates.end(); i++) {
+                    std::string feature = i->apply(lines, position);
+                    replace(feature, "\\", "\\\\");
+                    replace(feature, ":", "\\:");
+                    std::cout << "\t" << feature;
+                }
+                /*if(position == 0) std::cout << "\t__BOS__";
+                if(position == (int) lines.size() - 1) std::cout << "\t__EOS__";*/
+                std::cout << std::endl;
+            }
+            std::cout << std::endl;
+            lines.clear();
+        } else {
+            lines.push_back(tokens);
+        }
+    }
+}
diff --git a/maca_crf_tagger/src/crf_barebones_decoder.cc b/maca_crf_tagger/src/crf_barebones_decoder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..229868f4d0effabbe0e390f88be3cf8dc44b5cb3
--- /dev/null
+++ b/maca_crf_tagger/src/crf_barebones_decoder.cc
@@ -0,0 +1,90 @@
+#include <vector>
+#include "crf_decoder.hh"
+#include "crf_binlexicon.hh"
+#include "crf_features.hh"
+
+/* This is a sample decoder for the crf tagger.
+   compile with: 
+   g++ -O3 -Wall -o barebones_decoder barebones_decoder.cc 
+   
+   example usage:
+   echo -e "I\nam\nyour\nfather\n\njhon\neats\npotatoes\n" | ./barebones_decoder en/bin/crf_tagger.model.bin en/bin/crf_tagger.wordtag.lexicon
+   */
+
+void tag_sentence(macaon::Decoder& decoder, macaon::BinaryLexicon* lexicon, const std::vector<std::vector<std::string> >& lines, int wordField, bool isConll07) {
+
+    std::vector<std::vector<std::string> > features;
+    for(size_t i = 0; i < lines.size(); i++) {
+        std::vector<std::string> word_features;
+        macaon::FeatureGenerator::get_pos_features(lines[i][wordField], word_features);
+        features.push_back(word_features);
+        //for(size_t j = 0; j < word_features.size(); j++) std::cout << word_features[j] << " ";
+        //std::cout << "\n";
+    }
+    std::vector<std::string> tagged;
+    decoder.decodeString(features, tagged, lexicon);
+    for(size_t i = 0; i < tagged.size(); i++) {
+        if(isConll07) {
+            for(size_t j = 0; j < lines[i].size(); j++) {
+                if(j != 0) std::cout << "\t";
+                if(j == 3 || j == 4) std::cout << tagged[i];
+                else std::cout << lines[i][j];
+            }
+            std::cout << "\n";
+        } else {
+            std::cout << lines[i][wordField] << "\t" << tagged[i] << "\n";
+        }
+    }
+    std::cout << "\n";
+}
+
+void usage(const char* argv0) {
+    std::cerr << "usage: " << argv0 << " [--conll07] <model> [lexicon]\n";
+    exit(1);
+}
+
+int main(int argc, char** argv) {
+    bool isConll07 = false; // warning: no verification of conll07 format
+    int word_offset = 0;
+    std::string modelName = "";
+    std::string lexiconName = "";
+
+    for(int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if(arg == "-h" || arg == "--help") {
+            usage(argv[0]);
+        } else if(arg == "--conll07") {
+            isConll07 = true;
+            word_offset = 1;
+        } else if(modelName == "") {
+            modelName = arg;
+        } else if(lexiconName =="") {
+            lexiconName = arg;
+        } else {
+            usage(argv[0]);
+        }
+    }
+    if(modelName == "") usage(argv[0]);
+
+    macaon::Decoder decoder(modelName);
+    macaon::BinaryLexicon *lexicon = NULL;
+    if(lexiconName != "") lexicon = new macaon::BinaryLexicon(lexiconName, decoder.getTagset());
+
+    std::string line;
+    std::vector<std::vector<std::string> > lines;
+    while(std::getline(std::cin, line)) {
+        if(line == "") {
+            tag_sentence(decoder, lexicon, lines, word_offset, isConll07);
+            lines.clear();
+        } else {
+            std::vector<std::string> tokens;
+            macaon::Tokenize(line, tokens, "\t");
+            lines.push_back(tokens);
+        }
+    }
+    if(!lines.empty()) {
+        tag_sentence(decoder, lexicon, lines, word_offset, isConll07);
+    }
+    if(lexicon) delete lexicon;
+    return 0;
+}
diff --git a/maca_crf_tagger/src/crf_binlexicon.hh b/maca_crf_tagger/src/crf_binlexicon.hh
new file mode 100644
index 0000000000000000000000000000000000000000..82948603fd9a9d18f4bbf096131368425b49950b
--- /dev/null
+++ b/maca_crf_tagger/src/crf_binlexicon.hh
@@ -0,0 +1,365 @@
+#pragma once
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include "crf_model.hh"
+#include "crf_template.hh"
+#include "crf_lexicon.hh"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <limits.h>
+#ifdef CHAR_BIT
+#if CHAR_BIT != 8
+#error CHAR_BIT != 8 not supported
+#endif
+#endif
+
+namespace macaon {
+    const uint32_t lexiconMagic = 0xbffe1253;
+
+    class BinaryLexicon : public Lexicon {
+        // disable alignment in MSVC++
+#pragma pack(push, 1)
+        struct ModelInfo {
+            uint32_t magic;
+            uint32_t dataLocation;
+            uint32_t tableSize;
+            uint32_t numLabels;
+        } __attribute__((packed));
+
+        struct TableElement {
+            uint32_t hashValue;
+            uint8_t keySize;
+            uint8_t dataSize;
+            uint32_t location;
+        } __attribute__((packed)); // disable alignment in g++
+#define lexicon_tag_t uint8_t
+#define sizeof_LexiconTableElement (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint32_t))
+
+#pragma pack(pop)
+
+        private:
+        bool isBinary;
+        int fd;
+        const char* data;
+        size_t dataLength;
+        const ModelInfo* info;
+        const TableElement* table;
+
+        // copied from https://smhasher.googlecode.com/svn-history/r136/trunk/MurmurHash3.cpp (MIT license)
+
+        static inline uint32_t rotl32 ( uint32_t x, int8_t r ) 
+        {
+            return (x << r) | (x >> (32 - r));
+        }
+
+        static inline uint64_t rotl64 ( uint64_t x, int8_t r )
+        {
+            return (x << r) | (x >> (64 - r));
+        }
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+#define	FORCE_INLINE inline
+
+        static FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+        {
+            return p[i];
+        }
+
+        static FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i ) 
+        {
+            return p[i];
+        }
+
+        static FORCE_INLINE uint32_t fmix ( uint32_t h )
+        {
+            h ^= h >> 16;
+            h *= 0x85ebca6b;
+            h ^= h >> 13;
+            h *= 0xc2b2ae35;
+            h ^= h >> 16;
+
+            return h;
+        }
+
+        //----------
+
+        static FORCE_INLINE uint64_t fmix ( uint64_t k ) 
+        {
+            k ^= k >> 33;
+            k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+            k ^= k >> 33;
+            k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+            k ^= k >> 33;
+
+            return k;
+        }
+
+        static void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ) 
+        {
+            const uint8_t * data = (const uint8_t*)key;
+            const int nblocks = len / 4;
+
+            uint32_t h1 = seed;
+
+            uint32_t c1 = 0xcc9e2d51;
+            uint32_t c2 = 0x1b873593;
+
+            //----------
+            // body
+
+            const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+            for(int i = -nblocks; i; i++)
+            {
+                uint32_t k1 = getblock(blocks,i);
+
+                k1 *= c1;
+                k1 = ROTL32(k1,15);
+                k1 *= c2;
+
+                h1 ^= k1;
+                h1 = ROTL32(h1,13); 
+                h1 = h1*5+0xe6546b64;
+            }
+
+            //----------
+            // tail
+
+            const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+            uint32_t k1 = 0;
+
+            switch(len & 3)
+            {
+                case 3: k1 ^= tail[2] << 16;
+                case 2: k1 ^= tail[1] << 8;
+                case 1: k1 ^= tail[0];
+                        k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+                        //std::cerr << k1 << " " << h1 << " " << seed << "\n";
+            };
+
+            //----------
+            // finalization
+
+            h1 ^= len;
+
+            h1 = fmix(h1);
+
+            *(uint32_t*)out = h1;
+        } 
+
+        static uint32_t Hash(const char *k, size_t length) {
+            uint32_t output = 0;
+            MurmurHash3_x86_32(k, length, BinaryModelConstants::magic, &output);
+
+            // java hash function
+            /*uint32_t output = 0;
+            for(size_t i = 0; i < length; i++) {
+                output = 31 * output + k[i];
+            }*/
+            return output;
+        }
+
+        public:
+        BinaryLexicon() : Lexicon(), isBinary(false) {}
+        BinaryLexicon(const std::string &filename, Symbols* _tagSymbols = NULL) : Lexicon(), isBinary(false), fd(-1), data((const char*) MAP_FAILED) { 
+            tagSymbols = _tagSymbols;
+            Load(filename);
+        }
+
+        ~BinaryLexicon() {
+            if(data != MAP_FAILED) munmap((void*) data, dataLength);
+            if(fd != -1) close(fd);
+        }
+
+        bool Convert(const std::string& from, const std::string& to) {
+            std::cerr << "loading\n";
+            Lexicon::Load(from);
+            std::cerr << "writing\n";
+            return Write(to);
+        }
+
+        bool Write(const std::string & filename) {
+            FILE* output = fopen(filename.c_str(), "w");
+            // magic
+            fwrite(&lexiconMagic, sizeof(lexiconMagic), 1, output); // magic
+
+            // features
+            uint32_t dataLocation = 0;
+            uint32_t dataLocationOffset = (uint32_t) ftell(output);
+            fwrite(&dataLocation, sizeof(dataLocation), 1, output);
+            uint32_t tableSize = (uint32_t) wordSymbols.NumSymbols() * 2;
+            fwrite(&tableSize, sizeof(tableSize), 1, output);
+            uint32_t numLabels = tagsForWord[kUnknownWordTags].size();
+            fwrite(&numLabels, sizeof(numLabels), 1, output);
+
+            // create table
+            TableElement* table = (TableElement*) malloc(sizeof(TableElement) * tableSize);
+            memset(table, 0, sizeof(TableElement) * tableSize);
+
+            // write entries
+            int num = 0;
+            int totalNumCollisions = 0;
+            int numTags = 0;
+            int sizeOfKeys = 0;
+            for(SymbolsIterator siter(wordSymbols); !siter.Done(); siter.Next()) {
+                std::string word = siter.Symbol();
+                if(tagsForWordEntry.find(siter.Value()) == tagsForWordEntry.end()) {
+                    continue;
+                } 
+                int64 id = tagsForWordEntry[siter.Value()];
+
+                num++;
+                TableElement element;
+                element.hashValue = Hash(word.c_str(), word.length()) % tableSize;
+                element.keySize = (uint8_t) word.length();
+                element.dataSize = tagsForWord[id].size();
+                numTags += element.dataSize;
+                element.location = (uint32_t) ftell(output);
+                fwrite(word.c_str(), element.keySize, 1, output);
+                sizeOfKeys += element.keySize;
+                for(size_t tag = 0; tag < tagsForWord[id].size(); tag++) {
+                    lexicon_tag_t packed = (lexicon_tag_t) tagsForWord[id][tag];
+                    fwrite(&packed, sizeof(packed), 1, output);
+                }
+                if(element.dataSize > 0) {
+                    uint32_t hash = element.hashValue % tableSize;
+                    int numCollisions = 0;
+                    while(table[hash].location != 0) {
+                        numCollisions++;
+                        hash = (hash + 1) % tableSize;
+                    }
+                    totalNumCollisions += numCollisions;
+                    table[hash] = element;
+                }
+            }
+            std::cerr << "avg collisions: " << 1.0 * totalNumCollisions / (double) wordSymbols.NumSymbols() << "\n";
+            std::cerr << "sizeof (keys) = " << sizeOfKeys << "\n";
+            std::cerr << "sizeof (tags) = " << sizeof(lexicon_tag_t) << " * " << numTags << "\n";
+            std::cerr << "sizeof (entry in table) = " << sizeof_LexiconTableElement << " * " << tableSize << "\n";
+
+            // write table
+            dataLocation = (uint32_t) ftell(output);
+            for(uint32_t i = 0; i < tableSize; i++) {
+                fwrite(&table[i], sizeof(table[i]), 1, output);
+            }
+            free(table);
+
+            // set feature locations
+            fseek(output, dataLocationOffset, SEEK_SET);
+            fwrite(&dataLocation, sizeof(dataLocation), 1, output);
+
+            fclose(output);
+            return true;
+        }
+
+        bool Load(const std::string& filename) {
+            isBinary = false;
+
+            struct stat sb;
+            fd = open(filename.c_str(), O_RDONLY);
+            if(fd == -1) {
+                std::cerr << "ERROR: could not open crf lexicon \"" << filename << "\"\n";
+                return false;
+            }
+            if (fstat(fd, &sb) == -1) {
+                std::cerr << "ERROR: could not fstat crf lexicon \"" << filename << "\"\n";
+                return false;
+            }
+            dataLength = sb.st_size;
+            data = (const char*) mmap(NULL, dataLength, PROT_READ, MAP_PRIVATE, fd, 0);
+            if(data == MAP_FAILED) {
+                perror("mmap");
+                std::cerr << "ERROR: could mmap() crf lexicon \"" << filename << "\"\n";
+                return false;
+            }
+
+            info = (const ModelInfo*) data;
+
+            // read magic
+            if(info->magic != lexiconMagic) {
+                bool result = Lexicon::Load(filename);
+                if(result == false) {
+                    std::cerr << "ERROR: invalid magic or unsupported version in binary crf lexicon. Please reconvert it from text model.\n";
+                }
+                return result;
+            }
+
+            // read table
+            table = (const TableElement*) &data[info->dataLocation];
+
+            loaded = true;
+            isBinary = true;
+            return true;
+        }
+
+        bool GetTagsForWord(int64 word, std::vector<int64>& output) const {
+            if(!isBinary) {
+                return Lexicon::GetTagsForWord(word, output);
+            }
+            std::cerr << "ERROR: GetTagsForWord() not supported on binary models\n";
+            abort();
+            return false;
+        }
+
+        int NumLabels() const {
+            if(!isBinary) return Lexicon::NumLabels();
+            return info->numLabels;
+        }
+
+        bool GetTagsForWord(const std::string& word, std::vector<int64>& output) const {
+            if(!isBinary) {
+                return Lexicon::GetTagsForWord(word, output);
+                //std::cerr << "ERROR: called GetTagsForWord() on a non binary model\n";
+                //return false;
+            }
+            if(word == "<eps>") {
+                output.clear();
+                output.push_back(0);
+            }
+            size_t keySize = word.length();
+            uint32_t hashValue = Hash(word.c_str(), keySize); // % info->tableSize;
+            uint32_t offset = 0;
+            while(offset < info->tableSize) {
+                uint32_t location = (hashValue + offset) % info->tableSize;
+                const TableElement& element = table[location];
+                /*std::cerr << word << " " << location << " " << hashValue << " " << offset << " " << info->tableSize << 
+                    "|" << element.hashValue << " " << (int) element.keySize << " " << (int) element.dataSize << " " << element.location << 
+                    "\n";*/
+                if(element.location == 0) break;
+                if(element.keySize == keySize) {
+                    char key[keySize + 1];
+                    strncpy(key, &data[element.location], keySize);
+                    key[keySize] = '\0';
+                    if(std::string(key) == word) {
+                        //std::cerr << "h:" << element.hashValue << " k:" << element.keySize << " d:" << element.dataSize << "\n";
+                        output.clear();
+                        const lexicon_tag_t* tags = (const lexicon_tag_t*) &data[element.location + keySize];
+                        for(int i = 0; i < element.dataSize; i++) {
+                            output.push_back(tags[i]);
+                        }
+                        return true;
+                    }
+                }
+                offset++;
+            }
+            // unknown word
+            output.clear();
+            for(int i = 1; i < (int) info->numLabels + 1; i++) output.push_back(i);
+            return false;
+        }
+
+    };
+}
diff --git a/maca_crf_tagger/src/crf_binmodel.hh b/maca_crf_tagger/src/crf_binmodel.hh
new file mode 100644
index 0000000000000000000000000000000000000000..33deb53834e55bcb0007882f28fbe5573a89d60b
--- /dev/null
+++ b/maca_crf_tagger/src/crf_binmodel.hh
@@ -0,0 +1,406 @@
+#pragma once
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include "crf_model.hh"
+#include "crf_template.hh"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <limits.h>
+#ifdef CHAR_BIT
+#if CHAR_BIT != 8
+#error CHAR_BIT != 8 not supported
+#endif
+#endif
+
+namespace macaon {
+// disable alignment in MSVC++
+#pragma pack(push, 1)
+    struct ModelInfo {
+        uint32_t magic;
+        uint32_t templateLocation;
+        uint32_t numTemplates;
+        uint32_t labelLocation;
+        uint32_t numLabels;
+        uint32_t featureLocation;
+        uint32_t tableSize;
+    } __attribute__((packed));
+
+    struct TableElement {
+        uint32_t hashValue;
+        uint16_t keySize;
+        uint16_t dataSize;
+        uint32_t location;
+    } __attribute__((packed)); // disable alignment in g++
+#define sizeof_TableElement (sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t))
+
+    struct LabelWeight {
+        uint16_t label;
+        float weight;
+    } __attribute__((packed));
+#define sizeof_LabelWeight (sizeof(uint16_t) + sizeof(float))
+
+    struct LabelPairWeight {
+        uint16_t previous;
+        uint16_t label;
+        float weight;
+    } __attribute__((packed));
+#define sizeof_LabelPairWeight (sizeof(uint16_t) + sizeof(uint16_t) + sizeof(float))
+#pragma pack(pop)
+
+    namespace BinaryModelConstants {
+        const uint32_t magic = 0x132a0ab5;
+    }
+
+    class BinaryModel : public CRFModel {
+        private:
+            bool isBinary;
+            int fd;
+            const char* data;
+            size_t dataLength;
+            const ModelInfo* info;
+            const TableElement* table;
+            std::vector<double> B_weights; // cache for B template
+
+            // java hash function
+            uint32_t Hash(const char *k, size_t length) {
+                uint32_t output = 0;
+                for(size_t i = 0; i < length; i++) {
+                    output = 31 * output + k[i];
+                }
+                return output;
+            }
+
+        public:
+            BinaryModel() : CRFModel(), isBinary(false) {}
+            BinaryModel(const std::string &filename) : CRFModel(), isBinary(false), fd(-1), data((const char*) MAP_FAILED) { 
+                Load(filename);
+            }
+
+            ~BinaryModel() {
+                if(data != MAP_FAILED) munmap((void*) data, dataLength);
+                if(fd != -1) close(fd);
+            }
+
+            bool Convert(const std::string& from, const std::string& to) {
+                std::cerr << "loading\n";
+                CRFModel::Load(from);
+                std::cerr << "writing\n";
+                return Write(to);
+            }
+
+            // trimming is already performed when writing the bin model
+            void TrimModel() {
+                std::unordered_map<std::string, int> newFeatures;
+                for(std::unordered_map<std::string, int>::const_iterator feature = features.begin(); feature != features.end(); feature++) {
+                    if(feature->first[0] != 'B') {
+                        int numNonNull = 0;
+                        for(size_t i = 0; i < labels.size(); i++) {
+                            float weight = weights[feature->second + i];
+                            if(weight != 0) numNonNull++;
+                        }
+                        if(numNonNull > 0) {
+                            newFeatures[feature->first] = feature->second;
+                        }
+                    } else {
+                        newFeatures[feature->first] = feature->second;
+                    }
+                }
+                std::cerr << "trim: " << features.size() << " -> " << newFeatures.size() << "\n";
+                features = newFeatures;
+            }
+
+            bool Write(const std::string & filename) {
+                FILE* output = fopen(filename.c_str(), "w");
+                // magic
+                fwrite(&BinaryModelConstants::magic, sizeof(BinaryModelConstants::magic), 1, output); // magic
+
+                // templates
+                uint32_t templateLocation = 0;
+                uint32_t templateLocationOffset = (uint32_t) ftell(output);
+                fwrite(&templateLocation, sizeof(templateLocation), 1, output);
+                uint32_t numTemplates = (uint32_t) templates.size();
+                fwrite(&numTemplates, sizeof(numTemplates), 1, output);
+
+                // labels
+                uint32_t labelLocation = 0;
+                uint32_t labelLocationOffset = (uint32_t) ftell(output);
+                fwrite(&labelLocation, sizeof(labelLocation), 1, output);
+                uint32_t numLabels = (uint32_t) labels.size();
+                fwrite(&numLabels, sizeof(numLabels), 1, output);
+
+                // features
+                uint32_t featureLocation = 0;
+                uint32_t featureLocationOffset = (uint32_t) ftell(output);
+                fwrite(&featureLocation, sizeof(featureLocation), 1, output);
+                uint32_t tableSize = (uint32_t) features.size() * 3;
+                fwrite(&tableSize, sizeof(tableSize), 1, output);
+
+                // create table
+                TableElement* table = (TableElement*) malloc(sizeof(TableElement) * tableSize);
+                memset(table, 0, sizeof(TableElement) * tableSize);
+
+                // write templates
+                templateLocation = (uint32_t) ftell(output);
+                for(std::vector<CRFPPTemplate>::const_iterator i = templates.begin(); i != templates.end(); i++) {
+                    fprintf(output, "%s\n", i->text.c_str());
+                }
+
+                // write labels
+                std::vector<std::string> labelVector(labels.size());
+                for(std::unordered_map<std::string, int>::const_iterator label = labels.begin(); label != labels.end(); label++) {
+                    labelVector[label->second] = label->first;
+                }
+                labelLocation = (uint32_t) ftell(output);
+                for(size_t i = 0; i < labelVector.size(); i++) {
+                    fprintf(output, "%s\n", labelVector[i].c_str());
+                }
+
+                // write weights
+                int num = 0;
+                int totalNumCollisions = 0;
+                int numUnigram = 0;
+                int numBigram = 0;
+                for(std::unordered_map<std::string, int>::const_iterator feature = features.begin(); feature != features.end(); feature++) {
+                    num++;
+                    TableElement element;
+                    element.hashValue = Hash(feature->first.c_str(), feature->first.length()) % tableSize;
+                    element.keySize = (uint16_t) feature->first.length();
+                    element.dataSize = 0;
+                    element.location = (uint32_t) ftell(output);
+                    fwrite(feature->first.c_str(), element.keySize, 1, output);
+                    if(feature->first[0] == 'B') {
+                        for(uint16_t label = 0; label < numLabels; label++) {
+                            for(uint16_t previous = 0; previous < numLabels; previous++) {
+                                float weight = weights[feature->second + label + numLabels * previous];
+                                if(weight != 0) {
+                                    LabelPairWeight item;
+                                    item.previous = previous;
+                                    item.label = label;
+                                    item.weight = weight;
+                                    fwrite(&item, sizeof(item), 1, output);
+                                    element.dataSize ++;
+                                }
+                            }
+                        }
+                        numBigram++;
+                    } else {
+                        for(uint16_t label = 0; label < numLabels; label++) {
+                            float weight = weights[feature->second + label];
+                            if(weight != 0) {
+                                LabelWeight item;
+                                item.label = label;
+                                item.weight = weight;
+                                fwrite(&item, sizeof(item), 1, output);
+                                element.dataSize ++;
+                            }
+                        }
+                        numUnigram++;
+                    }
+                    if(element.dataSize > 0) {
+                        uint32_t hash = element.hashValue % tableSize;
+                        int numCollisions = 0;
+                        while(table[hash].location != 0) {
+                            numCollisions++;
+                            hash = (hash + 1) % tableSize;
+                        }
+                        totalNumCollisions += numCollisions;
+                        //std::cout << element.hashValue << " " << feature->first << "\n";
+                        table[hash] = element;
+                    }
+                }
+                std::cerr << "avg collisions: " << 1.0 * totalNumCollisions / (double) features.size() << "\n";
+                std::cerr << "sizeof (label+weight) = " << sizeof_LabelWeight << " * " << numUnigram << "\n";
+                std::cerr << "sizeof (label+label+weight) = " << sizeof_LabelPairWeight << " * " << numBigram << "\n";
+                std::cerr << "sizeof (entry in table) = " << sizeof_TableElement << " * " << tableSize << "\n";
+
+                // write table
+                featureLocation = (uint32_t) ftell(output);
+                for(uint32_t i = 0; i < tableSize; i++) {
+                    fwrite(&table[i], sizeof(table[i]), 1, output);
+                }
+                free(table);
+
+                // set section locations
+                fseek(output, templateLocationOffset, SEEK_SET);
+                fwrite(&templateLocation, sizeof(templateLocation), 1, output);
+
+                // set label locations
+                fseek(output, labelLocationOffset, SEEK_SET);
+                fwrite(&labelLocation, sizeof(labelLocation), 1, output);
+
+                // set feature locations
+                fseek(output, featureLocationOffset, SEEK_SET);
+                fwrite(&featureLocation, sizeof(featureLocation), 1, output);
+
+                fclose(output);
+                return true;
+            }
+
+            bool Load(const std::string& filename) {
+                isBinary = false;
+
+                struct stat sb;
+                fd = open(filename.c_str(), O_RDONLY);
+                if(fd == -1) {
+                    std::cerr << "ERROR: could not open crf model \"" << filename << "\"\n";
+                    return false;
+                }
+                if (fstat(fd, &sb) == -1) {
+                    std::cerr << "ERROR: could not fstat crf model \"" << filename << "\"\n";
+                    return false;
+                }
+                dataLength = sb.st_size;
+                data = (const char*) mmap(NULL, dataLength, PROT_READ, MAP_PRIVATE, fd, 0);
+                if(data == MAP_FAILED) {
+                    perror("mmap");
+                    std::cerr << "ERROR: could mmap() crf model \"" << filename << "\"\n";
+                    return false;
+                }
+                name = filename;
+
+                info = (const ModelInfo*) data;
+
+                // read magic
+                if(info->magic != BinaryModelConstants::magic) {
+                    //std::cerr << "WARNING: binary crf model format not recognized, trying text model\n";
+                    return CRFModel::Load(filename);
+                }
+
+                size_t lineSize = 0;
+
+                // read templates
+                templates.clear();
+                const char* line = (const char*) &data[info->templateLocation];
+                for(size_t i = 0; i < info->numTemplates; i++) {
+                    lineSize = strchr(line, '\n') - line;
+                    char content[lineSize + 1];
+                    strncpy(content, line, lineSize);
+                    content[lineSize] = '\0';
+                    //std::cerr << "TEMPLATE[" << content << "]\n";
+                    templates.push_back(CRFPPTemplate(content));
+                    line += lineSize + 1;
+                }
+
+                // read labels
+                labels.clear();
+                reverseLabels.clear();
+                line = (const char*) &data[info->labelLocation];
+                for(uint32_t i = 0; i < info->numLabels; i++) {
+                    lineSize = strchr(line, '\n') - line;
+                    char content[lineSize + 1];
+                    strncpy(content, line, lineSize);
+                    content[lineSize] = '\0';
+                    //std::cerr << "LABEL[" << content << "]\n";
+                    labels[std::string(content)] = (int) i;
+                    reverseLabels.push_back(std::string(content));
+                    line += lineSize + 1;
+                }
+
+                // read table
+                table = (const TableElement*) &data[info->featureLocation];
+
+                ComputeWindowOffset();
+                loaded = true;
+                isBinary = true;
+                GetWeights("B", B_weights);
+                return true;
+            }
+
+            bool GetWeights(const std::string& feature, std::vector<double>& output) {
+                if(!isBinary) {
+                    std::cerr << "ERROR: called GetWeights() on a non binary model\n";
+                    return false;
+                }
+                size_t keySize = feature.length();
+                uint32_t hashValue = Hash(feature.c_str(), keySize) % info->tableSize;
+                uint32_t offset = 0;
+                size_t numLabels = labels.size();
+                while(offset < info->tableSize) {
+                    uint32_t location = (hashValue + offset) % info->tableSize;
+                    const TableElement& element = table[location];
+                    if(element.location == 0) return false;
+                    if(element.keySize == keySize) {
+                        char key[keySize + 1];
+                        strncpy(key, &data[element.location], keySize);
+                        key[keySize] = '\0';
+                        if(std::string(key) == feature) {
+                            //std::cerr << "h:" << element.hashValue << " k:" << element.keySize << " d:" << element.dataSize << "\n";
+                            if(feature[0] == 'B') {
+                                output.assign(numLabels * numLabels, 0);
+                                const LabelPairWeight* items = (const LabelPairWeight*) &data[element.location + keySize];
+                                for(int i = 0; i < element.dataSize; i++) {
+                                    output[items[i].label + numLabels * items[i].previous] = items[i].weight;
+                                }
+                            } else {
+                                output.assign(numLabels, 0);
+                                const LabelWeight* items = (const LabelWeight*) &data[element.location + keySize];
+                                for(int i = 0; i < element.dataSize; i++) {
+                                    output[items[i].label] = items[i].weight;
+                                }
+                            }
+                            return true;
+                        }
+                    }
+                    offset++;
+                }
+                return false;
+            }
+
+            /* note: this function can use bigram templates conditionned on observations */
+            double rescore(const std::vector<std::vector<std::string> > &input, const std::vector<int> &context, const std::vector<int> &context_tags) {
+                if(!isBinary) {
+                    return CRFModel::rescore(input, context, context_tags);
+                }
+                double output = 0;
+                if((int) context.size() != window_length) return 0;
+                if(context[window_offset] < 0) return 0;
+                const int label = context_tags[window_offset]; //ilabels[input[context[window_offset]][input[context[window_offset]].size() - 1]];
+                int previous = -1;
+                if(window_length > 1 && context[window_offset - 1] >=0) previous = context_tags[window_offset - 1]; 
+                for(std::vector<CRFPPTemplate>::const_iterator i = templates.begin(); i != templates.end(); i++) {
+                    std::string feature = i->applyToClique(input, context, window_offset);
+                    std::vector<double> feature_weights;
+                    if(GetWeights(feature, feature_weights)) {
+                        if(i->type == CRFPPTemplate::UNIGRAM) output += feature_weights[label];
+                        else if(previous != -1) output += feature_weights[label + labels.size() * previous];
+                    }
+                }
+                return output;
+            }
+
+            /* note: this function CANNOT use bigram templates conditionned on observations */
+            double transition(int previous, int label) {
+                if(!isBinary) return CRFModel::transition(previous, label);
+                return B_weights[label + info->numLabels * previous];
+            }
+
+            void emissions(const std::vector<std::vector<std::string> > &input, const std::vector<int> &context, std::vector<double>& output) {
+                if(!isBinary) {
+                    CRFModel::emissions(input, context, output);
+                    return;
+                }
+                output.assign(labels.size(), 0);
+                if((int) context.size() != window_length) return;
+                if(context[window_offset] == -1) return;
+                for(std::vector<CRFPPTemplate>::const_iterator i = templates.begin(); i != templates.end(); i++) {
+                    if(i->type == CRFPPTemplate::UNIGRAM) {
+                        std::string feature = i->applyToClique(input, context, window_offset);
+                        std::vector<double> feature_weights;
+                        if(GetWeights(feature, feature_weights)) {
+                            for(size_t label = 0; label < labels.size(); label++) 
+                                output[label] += feature_weights[label];
+                        }
+                    }
+                }
+            }
+    };
+}
diff --git a/maca_crf_tagger/src/crf_decoder.hh b/maca_crf_tagger/src/crf_decoder.hh
new file mode 100644
index 0000000000000000000000000000000000000000..0c89aebddbefe82f06cdf5a3412c15dfd695a867
--- /dev/null
+++ b/maca_crf_tagger/src/crf_decoder.hh
@@ -0,0 +1,147 @@
+#pragma once
+#include <list>
+#ifdef __APPLE__
+#include "../../../third_party/unordered_map/unordered_map.hpp"
+#else
+#include <unordered_map>
+#endif
+#include "crf_binmodel.hh"
+#include "crf_utils.hh"
+#include "crf_binlexicon.hh"
+
+namespace macaon {
+
+    struct Decoder {
+
+        //CRFModel model;
+        BinaryModel model;
+        Symbols tagSet;
+
+        Decoder() : tagSet("tagset") { }
+        Decoder(const std::string &filename) : tagSet("tagset") { 
+            model.Load(filename); 
+            tagSet.AddSymbol("<eps>", 0);
+            for(std::unordered_map<std::string, int>::const_iterator label = model.labels.begin(); label != model.labels.end(); label++) {
+                tagSet.AddSymbol(label->first, label->second + 1);
+            }
+        }
+
+        Symbols* getTagset() {
+            return &tagSet;
+        }
+
+        bool IsLoaded() const {
+            return model.IsLoaded();
+        }
+
+        /* Faster decoder for simple sequences.
+         * This function supports an optional lexicon to specify allowed word/tags. And attional option sets the location of the word in the feature vector.
+         * */
+        void decodeString(const std::vector<std::vector<std::string> > &features, std::vector<std::string> &predictions, const BinaryLexicon* lexicon=NULL, int wordFeatureLocation=0) {
+            int length = features.size();
+            int numLabels = model.labels.size();
+
+            /*if(lexicon->NumLabels() != numLabels) {
+                std::cerr << "ERROR: num label mismatch between model and lexicon\n";
+                return;
+            }*/
+
+            // store score and backtrack matrices (TODO: size matrices according to possible word/tag assoc)
+            std::vector<std::vector<double> > scores(length, std::vector<double>(numLabels, 0.0));
+            std::vector<std::vector<int> > backtrack(length, std::vector<int>(numLabels, 0));
+            /*double** scores = new double*[length];
+            int** backtrack = new int*[length];
+
+            for(int i = 0; i < length; i++) {
+				scores[i] = new double[numLabels];
+				backtrack[i] = new int[numLabels];
+                for(int j = 0; j < numLabels; j++) {
+                    backtrack[i][j] = -1;
+                    scores[i][j] = 0.0;
+                }
+            }*/
+
+            // possible tags for each word: use lexicon if provided
+            std::vector<std::vector<int64> > wordTags(length);
+            std::vector<int64> allTags(numLabels);
+            for(int label = 0; label < numLabels; label++) allTags[label] = label + 1; // warning: there is an offset of one for epsilon transitions
+
+            // perform viterbi search for the maximum scoring labeling
+            for(int current = 0; current < length; current++) {
+                // honor lexicon or allow all tags
+                if(lexicon != NULL) lexicon->GetTagsForWord(features[current][wordFeatureLocation], wordTags[current]);
+                else wordTags[current] = allTags;
+
+                // create context vector (offset of features for current word)
+                std::vector<int> context(model.window_length);
+                for(int i = 0; i < model.window_length; i++) 
+                    if(current + i - model.window_offset >= 0 && current + i - model.window_offset < length) context[i] = (current + i - model.window_offset);
+                    else context[i] = -1;
+
+                // compute emissions and find highest scoring transition pair
+                if(current == 0) {
+                    // TODO: compute emissions only for valid word/tags pairs
+                    std::vector<double> emissions;
+                    model.emissions(features, context, emissions);
+                    for(int e = 0; e < numLabels; e++) scores[current][e] = emissions[e];
+                } else {
+                    std::vector<double> emissions;
+                    model.emissions(features, context, emissions);
+                    for(int e = 0; e < numLabels; e++) scores[current][e] = emissions[e];
+                    for(size_t i = 0; i < wordTags[current].size(); i++) {
+                        int label = wordTags[current][i] - 1;
+                        if(label < 0 || label >= numLabels) {
+                            std::cerr << "ERROR: unexpected label (" << label << ") from lexicon, please check that it is compatible with model.\n";
+                            return;
+                        }
+                        double max = 0;
+                        int argmax = -1;
+                        for(size_t j = 0; j < wordTags[current - 1].size(); j++) {
+                            int previous = wordTags[current - 1][j] - 1;
+                            if(previous < 0 || previous >= numLabels) {
+                                std::cerr << "ERROR: unexpected label (" << previous << ") from lexicon, please check that it is compatible with model.\n";
+                                return;
+                            }
+                            double score = scores[current][label] + scores[current - 1][previous] + model.transition(previous, label);
+                            if(argmax == -1 || max < score) {
+                                max = score;
+                                argmax = previous;
+                            }
+                        }
+                        scores[current][label] = max;
+                        backtrack[current][label] = argmax;
+                    }
+                }
+            }
+            // find last label
+            double max = 0;
+            int argmax = -1;
+            if(length > 0) {
+                for(size_t i = 0; i < wordTags[length - 1].size(); i++) {
+                    int label = wordTags[length - 1][i] - 1;
+                    if(argmax == -1 || scores[length - 1][label] > max) {
+                        max = scores[length - 1][label];
+                        argmax = label;
+                    }
+                }
+            }
+
+            // backtrack solution
+            int current = length - 1;
+            predictions.clear();
+            predictions.resize(length);
+            while(current >= 0) {
+                predictions[current] = model.reverseLabels[argmax];
+                argmax = backtrack[current][argmax];
+                current --;
+            }
+
+			/*for(int i = 0; i < length; i++) {
+				delete scores[i];
+				delete backtrack[i];
+			}
+			delete scores;
+			delete backtrack;*/
+        }
+    };
+}
diff --git a/maca_crf_tagger/src/crf_features.hh b/maca_crf_tagger/src/crf_features.hh
new file mode 100644
index 0000000000000000000000000000000000000000..917a395b7a5ba9d31bf3578c93bcfe1e98eedfe9
--- /dev/null
+++ b/maca_crf_tagger/src/crf_features.hh
@@ -0,0 +1,100 @@
+#pragma once
+#include <vector>
+#include <string>
+
+namespace macaon {
+    class FeatureGenerator {
+        static void prefixesUtf8(const std::string &word, int n, std::vector<std::string> &output) {
+            size_t offset = 0;
+            while(offset < word.length() && n > 0) {
+                if((unsigned char)word[offset] >> 7 == 1) { // 1xxxxxxx (length of utf8 character)
+                    offset++;
+                    while(offset < word.length() && (unsigned char)word[offset] >> 6 == 2) { // 10xxxxxx (continuation of character)
+                        offset++;
+                    }
+                } else {
+                    offset++;
+                }
+
+                output.push_back(word.substr(0, offset));
+                n--;
+            }
+            while(n > 0) {
+                output.push_back("__nil__");
+                n--;
+            }
+        }
+
+        static void suffixesUtf8(const std::string &word, int n, std::vector<std::string> &output) {
+            std::vector<int> char_starts;
+            size_t offset = 0;
+            while(offset < word.length()) {
+                char_starts.push_back(offset);
+                if((unsigned char)word[offset] >> 7 == 1) { // 1xxxxxxx (length of utf8 character)
+                    offset++;
+                    while(offset < word.length() && (unsigned char)word[offset] >> 6 == 2) { // 10xxxxxx (continuation of character)
+                        offset++;
+                    }
+                } else {
+                    offset++;
+                }
+            }
+            for(int i = char_starts.size() - 1; i > 0 && n > 0; i--) {
+                //std::cerr << "s=[" << word.substr(offsets[i]) << "]\n";
+                output.push_back(word.substr(char_starts[i]));
+                n--;
+            }
+            while(n > 0) {
+                output.push_back("__nil__");
+                n--;
+            }
+        }
+
+
+        static void prefixes(const std::string &word, int n, std::vector<std::string> &output) {
+            int length = word.length();
+            for(int i = 1; i <= n; i++) {
+                if(length >= i) output.push_back(word.substr(0, i));
+                else output.push_back("__nil__");
+            }
+        }
+        static void suffixes(const std::string &word, int n, std::vector<std::string> &output) {
+            int length = word.length();
+            for(int i = 1; i <= n; i++) {
+                if(length >= i) output.push_back(word.substr(length - i, i));
+                else output.push_back("__nil__");
+            }
+        }
+        static void wordClasses(const std::string &word, std::vector<std::string> &output) {
+            bool containsNumber = false;
+            bool containsSymbol = false;
+            for(int i = 0; i < (int) word.length(); i++) {
+                if(!containsNumber && word.at(i) >= '0' && word.at(i) <= '9') containsNumber = true;
+                if(!containsSymbol && !((word.at(i) >= '0' && word.at(i) <= '9') || (word.at(i) >= 'a' && word.at(i) <= 'z') || (word.at(i) >= 'A' && word.at(i) <= 'Z'))) containsSymbol = true;
+            }
+            if(containsNumber) output.push_back("Y");
+            else output.push_back("N");
+            if(word.length() >= 2 && word.at(0) >= 'A' && word.at(0) <= 'Z' && word.at(1) >= 'a' && word.at(1) <= 'z') output.push_back("Y");
+            else output.push_back("N");
+            if(containsSymbol) output.push_back("Y");
+            else output.push_back("N");
+        }
+    public:
+        static void get_pos_features(const std::string &word, std::vector<std::string> &output, bool utf8=true) {
+            output.push_back(word);
+            wordClasses(word, output);
+            if(utf8) {
+                prefixesUtf8(word, 4, output);
+                suffixesUtf8(word, 4, output);
+            } else {
+                prefixes(word, 4, output);
+                suffixes(word, 4, output);
+            }
+        }
+        static std::vector<std::string> get_pos_features(const std::string &word, bool utf8=true) {
+            std::vector<std::string> output;
+            get_pos_features(word, output, utf8);
+            return output;
+        }
+    };
+}
diff --git a/maca_crf_tagger/src/crf_lexicon.hh b/maca_crf_tagger/src/crf_lexicon.hh
new file mode 100644
index 0000000000000000000000000000000000000000..5182c1fbb0142053a761b9f77029227b68bb70be
--- /dev/null
+++ b/maca_crf_tagger/src/crf_lexicon.hh
@@ -0,0 +1,128 @@
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <stdint.h>
+#ifdef __APPLE__
+#include "../../../third_party/unordered_map/unordered_map.hpp"
+#else
+#include <unordered_map>
+#endif
+#include "crf_utils.hh"
+
+namespace macaon {
+    const int kEpsilonTags = 0;
+    const int kUnknownWordTags = 1;
+
+    class Lexicon {
+    protected:
+        bool loaded;
+
+        Symbols wordSymbols;
+        Symbols* tagSymbols;
+
+        std::vector<std::vector<int64> > tagsForWord;
+        std::unordered_map<int64, int> tagsForWordEntry;
+
+
+    public:
+        Lexicon() : loaded(false), wordSymbols("words"), tagSymbols(NULL) {
+            wordSymbols.AddSymbol("<eps>", 0);
+        }
+
+        Lexicon(const std::string& filename, Symbols* _tagSymbols) : loaded(false), wordSymbols("words"), tagSymbols(_tagSymbols) {
+            wordSymbols.AddSymbol("<eps>", 0);
+            Load(filename);
+        }
+
+        virtual ~Lexicon() {
+        }
+
+        bool NumLabels() const {
+            return tagSymbols->NumSymbols() - 1; // account for epsilon
+        }
+
+        bool Load(const std::string &filename) {
+            tagsForWord.push_back(std::vector<int64>()); // keep space for epsilon tags
+            tagsForWord.push_back(std::vector<int64>()); // keep space for unk word tags
+            loaded = false;
+            std::unordered_map<std::string, int> known;
+            std::ifstream input(filename.c_str());
+            if(!input.is_open()) {
+                std::cerr << "ERROR: could not open " << filename << " in Lexicon::Load()" << std::endl;
+                return false;
+            }
+            while(!input.eof()) {
+                std::string line;
+                std::getline(input, line);
+                if(input.eof()) break;
+                std::string word;
+                std::string::size_type end_of_word = line.find('\t');
+                if(end_of_word == std::string::npos) {
+                    return false;
+                }
+                word = line.substr(0, end_of_word);
+                int64 wordId = wordSymbols.AddSymbol(word);
+                std::string signature = line.substr(end_of_word + 1);
+                std::unordered_map<std::string, int>::const_iterator found = known.find(signature);
+                if(found == known.end()) {
+                    int id = tagsForWord.size();
+                    known[signature] = id;
+                    tagsForWordEntry[wordId] = id;
+                    std::vector<std::string> tokens;
+                    Tokenize(signature, tokens, "\t");
+                    std::vector<int64> tagset;
+                    for(std::vector<std::string>::const_iterator i = tokens.begin(); i != tokens.end(); i++) {
+                        int64 tagId = tagSymbols->Find(*i);
+                        if(tagId != -1) tagset.push_back(tagId);
+                    }
+                    tagsForWord.push_back(tagset);
+                } else {
+                    tagsForWordEntry[wordId] = found->second;
+                }
+            }
+            tagsForWord[kEpsilonTags].push_back(0); // epsilon
+            for(SymbolsIterator siter(*tagSymbols); !siter.Done(); siter.Next()) { // unknown word
+                if(siter.Value() != 0) tagsForWord[kUnknownWordTags].push_back(siter.Value());
+            }
+            loaded = true;
+            return loaded;
+        }
+
+        virtual bool GetTagsForWord(const std::string& word, std::vector<int64>& output) const {
+            return GetTagsForWord(wordSymbols.Find(word), output);
+        }
+
+        virtual bool GetTagsForWord(int64 word, std::vector<int64>& output) const {
+            if(!IsLoaded()) {
+                std::cerr << "ERROR: Lexicon::GetTagsForWord(" << wordSymbols.Find(word) << ") called on empty lexicon" << std::endl;
+                return false;
+            }
+            if(word == -1) {
+                output = tagsForWord[kUnknownWordTags];
+                return true;
+            }
+            if(word == 0) {
+                output = tagsForWord[kEpsilonTags];
+                return true;
+            }
+            std::unordered_map<int64, int>::const_iterator found = tagsForWordEntry.find(word);
+            if(found == tagsForWordEntry.end()) {
+                output = tagsForWord[kUnknownWordTags];
+            } else {
+                if(tagsForWord[found->second].size() == 0) {
+                    std::cerr << "WARNING: inconsistancy between word/tag lexicon and model, word no " << word << " has no tags => treat as unknown word\n";
+                    output = tagsForWord[kUnknownWordTags];
+                }
+                output = tagsForWord[found->second];
+            }
+            return true;
+        }
+
+        bool IsLoaded() const {
+            return loaded;
+        }
+
+    };
+}
diff --git a/maca_crf_tagger/src/crf_model.hh b/maca_crf_tagger/src/crf_model.hh
new file mode 100644
index 0000000000000000000000000000000000000000..6547460fb5beff1d79c2ff1b7189169c602c2154
--- /dev/null
+++ b/maca_crf_tagger/src/crf_model.hh
@@ -0,0 +1,170 @@
+#pragma once
+#include <string>
+#include <vector>
+#ifdef __APPLE__
+#include "../../../third_party/unordered_map/unordered_map.hpp"
+#else
+#include <unordered_map>
+#endif
+#include <stdio.h>
+#include <errno.h>
+#include "crf_template.hh"
+
+namespace macaon {
+    class CRFModel {
+    protected:
+        std::string name;
+        std::vector<CRFPPTemplate> templates;
+        int version;
+        double cost_factor;
+        int maxid;
+        int xsize;
+        std::unordered_map<std::string, int> features;
+        std::vector<float> weights;
+        bool loaded;
+        int bigramWeightLocation;
+    public:
+        std::unordered_map<std::string, int> labels;
+        std::vector<std::string> reverseLabels;
+        int window_offset;
+        int window_length;
+        CRFModel() : loaded(false) {}
+        CRFModel(const std::string &filename) : loaded(false) { Load(filename); }
+
+        bool Load(const std::string &filename) {
+            name = filename;
+            FILE* fp = fopen(filename.c_str(), "r");
+            if(!fp) {
+                fprintf(stderr, "ERROR: %s, %s\n", filename.c_str(), strerror(errno));
+                return false;
+            }
+            char line[1024];
+            int section = 0;
+            int header_num = 0;
+            int line_num = 0;
+            int num_non_null = 0;
+            while(NULL != fgets(line, 1024, fp)) {
+                line_num ++;
+                if(line[0] == '\n') {
+                    section ++;
+                } else {
+                    line[1023] = '\0';
+                    line[strlen(line) - 1] = '\0'; // chomp
+                    if(section == 0) { // header
+                        char* space = line;
+                        while(*space != ' ' && *space != '\0') space ++;
+                        if(header_num == 0) version = strtol(space + 1, NULL, 10);
+                        else if(header_num == 1) cost_factor = strtod(space + 1, NULL);
+                        else if(header_num == 2) maxid = strtol(space + 1, NULL, 10);
+                        else if(header_num == 3) xsize = strtol(space + 1, NULL, 10);
+                        else {
+                            fprintf(stderr, "ERROR: unexpected header line %d in %s\n", line_num, filename.c_str());
+                            fclose(fp);
+                            return false;
+                        }
+                        header_num ++;
+                    } else if (section == 1) { // labels
+                        int next_id = labels.size();
+                        labels[std::string(line)] = next_id;
+                        reverseLabels.push_back(std::string(line));
+                    } else if (section == 2) { // templates
+                        templates.push_back(CRFPPTemplate(line));
+                    } else if (section == 3) { // feature indexes
+                        char* space = line;
+                        while(*space != ' ' && *space != '\0') space ++;
+                        *space = '\0';
+                        int index = strtol(line, NULL, 10);
+                        features[std::string(space + 1)] = index;
+                    } else if (section == 4) { // weights
+                        float weight = (float) strtod(line, NULL);
+                        if(weight != 0) num_non_null++;
+                        weights.push_back(weight);
+                    } else {
+                        fprintf(stderr, "ERROR: too many sections in %s\n", filename.c_str());
+                        fclose(fp);
+                        return false;
+                    }
+                }
+            }
+            //std::cerr << "weights: " << num_non_null << "/" << weights.size() << "\n";
+            fclose(fp);
+
+            ComputeWindowOffset();
+
+            std::unordered_map<std::string, int>::const_iterator found = features.find("B");
+            if(found != features.end()) {
+                bigramWeightLocation = found->second;
+            }
+            loaded = true;
+            return true;
+        }
+
+        void ComputeWindowOffset() {
+            int max_template_offset = 0;
+            int min_template_offset = 9;
+            for(std::vector<CRFPPTemplate>::const_iterator i = templates.begin(); i != templates.end(); i++) {
+                if(i->type == CRFPPTemplate::BIGRAM && min_template_offset > -1) min_template_offset = -1; // account for label bigram 
+                for(std::vector<TemplateItem>::const_iterator j = i->items.begin(); j != i->items.end(); j++) {
+                    if(j->line < min_template_offset) min_template_offset = j->line;
+                    if(j->line > max_template_offset) max_template_offset = j->line;
+                }
+            }
+            window_offset = - min_template_offset;
+            window_length = max_template_offset - min_template_offset + 1;
+        }
+
+        bool IsLoaded() const {
+            return loaded;
+        }
+
+        /* note: this function can use bigram templates conditionned on observations */
+        virtual double rescore(const std::vector<std::vector<std::string> > &input, const std::vector<int> &context, const std::vector<int> &context_tags) {
+            double output = 0;
+            if((int) context.size() != window_length) return 0;
+            //std::cerr << context[window_offset] << std::endl;
+            if(context[window_offset] < 0) return 0;
+            const int label = context_tags[window_offset]; //ilabels[input[context[window_offset]][input[context[window_offset]].size() - 1]];
+            int previous = -1;
+            if(window_length > 1 && context[window_offset - 1] >=0) previous = context_tags[window_offset - 1]; //labels[input[context[window_offset - 1]][input[context[window_offset - 1]].size() - 1]];
+            for(std::vector<CRFPPTemplate>::const_iterator i = templates.begin(); i != templates.end(); i++) {
+                std::string feature = i->applyToClique(input, context, window_offset);
+                //std::cerr << "feature: " << feature << std::endl;
+                std::unordered_map<std::string, int>::const_iterator found = features.find(feature);
+                if(found != features.end()) {
+                    if(found->second >= 0 && found->second < (int) weights.size()) {
+                        if(i->type == CRFPPTemplate::UNIGRAM) output += weights[found->second + label];
+                        else if(previous != -1) output += weights[found->second + label + labels.size() * previous];
+                    }
+                }
+            }
+            return output;
+        }
+
+        /* note: this function CANNOT use bigram templates conditionned on observations */
+        virtual double transition(int previous, int label) {
+            if(bigramWeightLocation < 0) return 0;
+            return weights[bigramWeightLocation + label + labels.size() * previous];
+        }
+
+        virtual void emissions(const std::vector<std::vector<std::string> > &input, const std::vector<int> &context, std::vector<double>& output) {
+            output.clear();
+            output.resize(labels.size());
+            if((int) context.size() != window_length) return;
+            if(context[window_offset] == -1) return;
+            for(std::vector<CRFPPTemplate>::const_iterator i = templates.begin(); i != templates.end(); i++) {
+                std::string feature = i->applyToClique(input, context, window_offset);
+                //std::cerr << " " << feature;
+                std::unordered_map<std::string, int>::const_iterator found = features.find(feature);
+                if(found != features.end()) {
+                    if(found->second >= 0 && found->second < (int) weights.size()) {
+                        if(i->type == CRFPPTemplate::UNIGRAM) 
+                            for(size_t label = 0; label < labels.size(); label++) 
+                                output[label] += weights[found->second + label];
+                    }
+                }
+                //else std::cerr << "*";
+            }
+            //std::cerr << "\n";
+        }
+    };
+}
diff --git a/maca_crf_tagger/src/crf_tagger b/maca_crf_tagger/src/crf_tagger
new file mode 100755
index 0000000000000000000000000000000000000000..48867b39b0ef4bffed7927ba5b51a0cbe81cb1e2
Binary files /dev/null and b/maca_crf_tagger/src/crf_tagger differ
diff --git a/maca_crf_tagger/src/crf_tagger.cc b/maca_crf_tagger/src/crf_tagger.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a9fba5a923897863404933d34a377e2249b24a0d
--- /dev/null
+++ b/maca_crf_tagger/src/crf_tagger.cc
@@ -0,0 +1,60 @@
+#include <vector>
+#include "crf_decoder.hh"
+#include "crf_binlexicon.hh"
+#include "crf_features.hh"
+
+void tag_sentence(macaon::Decoder& decoder, macaon::BinaryLexicon* lexicon, const std::vector<std::string>& words) {
+
+    std::vector<std::vector<std::string> > features;
+    for(size_t i = 0; i < words.size(); i++) {
+        std::vector<std::string> word_features;
+        macaon::FeatureGenerator::get_pos_features(words[i], word_features);
+        features.push_back(word_features);
+        /*for(size_t j = 0; j < word_features.size(); j++) std::cout << word_features[j] << " ";
+        std::cout << "\n";*/
+    }
+    std::vector<std::string> tagged;
+    decoder.decodeString(features, tagged, lexicon);
+    for(size_t i = 0; i < tagged.size(); i++) {
+        if(i > 0) std::cout << " ";
+        std::cout << words[i] << "/" << tagged[i];
+    }
+    std::cout << "\n";
+}
+
+void usage(const char* argv0) {
+    std::cerr << "usage: " << argv0 << " <model> [lexicon]\n";
+    exit(1);
+}
+
+int main(int argc, char** argv) {
+    std::string modelName = "";
+    std::string lexiconName = "";
+
+    for(int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if(arg == "-h" || arg == "--help") {
+            usage(argv[0]);
+        } else if(modelName == "") {
+            modelName = arg;
+        } else if(lexiconName =="") {
+            lexiconName = arg;
+        } else {
+            usage(argv[0]);
+        }
+    }
+    if(modelName == "") usage(argv[0]);
+
+    macaon::Decoder decoder(modelName);
+    macaon::BinaryLexicon *lexicon = NULL;
+    if(lexiconName != "") lexicon = new macaon::BinaryLexicon(lexiconName, decoder.getTagset());
+
+    std::string line;
+    while(std::getline(std::cin, line)) {
+        std::vector<std::string> words;
+        macaon::Tokenize(line, words, " ");
+        tag_sentence(decoder, lexicon, words);
+    }
+    if(lexicon) delete lexicon;
+    return 0;
+}
diff --git a/maca_crf_tagger/src/crf_template.hh b/maca_crf_tagger/src/crf_template.hh
new file mode 100644
index 0000000000000000000000000000000000000000..7307f14832592d0f06390677500dc1d351401a7e
--- /dev/null
+++ b/maca_crf_tagger/src/crf_template.hh
@@ -0,0 +1,146 @@
+#pragma once
+#include <vector>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <string.h>
+#include <stdlib.h>
+#include <algorithm>
+
+namespace macaon {
+    // from http://www.jb.man.ac.uk/~slowe/cpp/itoa.html
+    static std::string number_to_string(const int value) {
+        const int base = 10;
+        std::string buf;
+        buf.reserve(35);
+        int quotient = value;
+        do {
+            buf += "0123456789abcdef"[ abs( quotient % base ) ];
+            quotient /= base;
+        } while (quotient);
+        if (value < 0) buf += '-';
+        reverse( buf.begin(), buf.end() );
+        return buf;
+    }
+
+    struct TemplateItem {
+        int line;
+        int column;
+        std::string prefix;
+        TemplateItem(const int _line, const int _column, const std::string &_prefix) : line(_line), column(_column), prefix(_prefix) { }
+        //friend std::ostream &operator<<(std::ostream &, const TemplateItem & );
+    };
+
+    struct CRFPPTemplate {
+        enum TemplateType {
+            UNIGRAM,
+            BIGRAM,
+        };
+        std::string text;
+        TemplateType type;
+        int size;
+        std::string suffix;
+        std::vector<TemplateItem> items;
+        CRFPPTemplate() {}
+        CRFPPTemplate(const char* input) { read(input); }
+        friend std::ostream &operator<<(std::ostream &, const CRFPPTemplate & );
+
+        std::string apply(const std::vector<std::vector<std::string> > &clique, int offset) const {
+            std::ostringstream output;
+            for(std::vector<TemplateItem>::const_iterator i = items.begin(); i != items.end(); i++) {
+                output << i->prefix;
+                int column = i->column;
+                int line = i->line + offset;
+                if(line >= 0 && line < (int) clique.size()) {
+                    if(column >= 0 && column < (int) clique[line].size()) {
+                        output << clique[line][column];
+                    } else {
+                        std::cerr << "ERROR: invalid column " << column << " in template \"" << text << "\"\n";
+                        return "";
+                    }
+                } else {
+                    output << "_B";
+                    output << number_to_string(line);
+                }
+            }
+            output << suffix;
+            return output.str();
+        }
+
+        std::string applyToClique(const std::vector<std::vector<std::string> > &features, const std::vector<int> &clique, int offset) const {
+            std::string output;
+            for(std::vector<TemplateItem>::const_iterator i = items.begin(); i != items.end(); i++) {
+                output += i->prefix;
+                int column = i->column;
+                int line = i->line;
+                if(line + offset >= 0 && line + offset < (int) clique.size() && clique[line + offset] >=0) {
+                    if(column >= 0 && column < (int) features[clique[line + offset]].size()) {
+                        output += features[clique[line + offset]][column];
+                    } else {
+                        std::cerr << "ERROR: invalid column " << column << " in template \"" << text << "\"\n";
+                        return "";
+                    }
+                } else {
+                    output += "_B";
+                    output += number_to_string(line);
+                }
+            }
+            output += suffix;
+            return output;
+        }
+
+        void read(const char* input) {
+            text = input;
+            size = 0;
+            const char* current = input;
+            const char* gap_start = NULL, *gap_end = NULL, *line_start = NULL, *column_start = NULL;
+            int state = 0;
+            gap_start = current;
+            /* template is a succession of %x[-?\d+,\d+] which must be replaced by corresponding 
+             * features at the given line, column relative to the current example.
+             * They are parsed with a rudimentary state machine, and stored in the template.
+             */
+            if(*current == 'U') type = UNIGRAM;
+            else if(*current == 'B') type = BIGRAM;
+            else {
+                std::cerr << "ERROR: unexpected template type \"" << input << "\"\n";
+                return;
+            }
+            while(*current != '\0') {
+                if(state == 0 && *current == '%') { state ++; gap_end = current; }
+                else if(state == 1 && *current == 'x') { state ++; }
+                else if(state == 2 && *current == '[') state ++;
+                else if(state == 3 && (*current == '-' || (*current >= '0' && *current <= '9'))) { state ++; line_start = current; }
+                else if(state == 4 && (*current >= '0' && *current <= '9'));
+                else if(state == 4 && *current == ',') { state ++; }
+                else if(state == 5 && (*current >= '0' && *current <= '9')) { state ++; column_start = current; }
+                else if(state == 6 && (*current >= '0' && *current <= '9'));
+                else if(state == 6 && *current == ']') {
+                    state = 0;
+					std::string gap = std::string(gap_start, gap_end - gap_start);
+                    int column = strtol(column_start, NULL, 10);
+                    int line = strtol(line_start, NULL, 10);
+                    items.push_back(TemplateItem(line, column, gap));
+                    size++;
+                    gap_start = current + 1;
+                } else state = 0;
+                current ++;
+            }
+            suffix = gap_start; // add trailing text
+        }
+    };
+
+    /*std::ostream &operator<<(std::ostream &output, const macaon::TemplateItem &item) {
+        output << item.prefix << "%x[" << item.line << "," << item.column << "]";
+        return output;
+    }
+
+    std::ostream &operator<<(std::ostream &output, const macaon::CRFPPTemplate &featureTemplate) {
+        for(std::vector<macaon::TemplateItem>::const_iterator i = featureTemplate.items.begin(); i != featureTemplate.items.end(); i++) {
+            output << (*i);
+        }
+        output << featureTemplate.suffix;
+        return output;
+    }*/
+
+}
diff --git a/maca_crf_tagger/src/crf_utils.hh b/maca_crf_tagger/src/crf_utils.hh
new file mode 100644
index 0000000000000000000000000000000000000000..8a33ab7b3839e88e02b4af96ea94f6c593195d34
--- /dev/null
+++ b/maca_crf_tagger/src/crf_utils.hh
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+#define int64 int
+
+namespace macaon {
+    class Symbols {
+        protected:
+            std::string name;
+            std::unordered_map<std::string, int> word2int;
+            std::unordered_map<int, std::string> int2word;
+        public:
+            Symbols(std::string _name) : name(_name) {}
+            int AddSymbol(const std::string& symbol, int value = -1) {
+                if(value == -1) value = word2int.size();
+                word2int[symbol] = value;
+                int2word[value] = symbol;
+                return value;
+            }
+            int Find(const std::string& word) const {
+                std::unordered_map<std::string, int>::const_iterator found = word2int.find(word);
+                if(found != word2int.end()) return found->second;
+                return -1;
+            }
+            const std::string Find(const int64 id) const {
+                std::unordered_map<int, std::string>::const_iterator found = int2word.find(id);
+                if(found != int2word.end()) return found->second;
+                return "";
+            }
+            int NumSymbols() const {
+                return word2int.size();
+            }
+            friend class SymbolsIterator;
+    };
+    class SymbolsIterator {
+            const Symbols& symbols;
+            std::unordered_map<std::string, int>::const_iterator iter;
+        public:
+            SymbolsIterator(const Symbols& _symbols) : symbols(_symbols) { 
+                iter = symbols.word2int.begin();
+            }
+            bool Done() { 
+                return iter == symbols.word2int.end();
+            }
+            void Next() {
+                iter++;
+            }
+            const std::string Symbol() {
+                return iter->first;
+            }
+            int Value() {
+                return iter->second;
+            }
+    };
+
+    // http://www.oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html
+    static void Tokenize(const std::string& str, std::vector<std::string>& tokens, const std::string& delimiters = " ", bool strict = false)
+    {
+        std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+        std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
+        tokens.clear();
+        while (std::string::npos != pos || std::string::npos != lastPos)
+        {
+            tokens.push_back(str.substr(lastPos, pos - lastPos));
+            if(strict) {
+                if(pos == std::string::npos) break;
+                lastPos = pos + 1;
+            } else lastPos = str.find_first_not_of(delimiters, pos);
+            pos = str.find_first_of(delimiters, lastPos);
+        }
+    }
+
+}
diff --git a/maca_crf_tagger/src/lemmatizer.cc b/maca_crf_tagger/src/lemmatizer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..70c019f7de0a9774c531af10f8f680069f053a4d
--- /dev/null
+++ b/maca_crf_tagger/src/lemmatizer.cc
@@ -0,0 +1,19 @@
+#include "lemmatizer.h"
+
+int main(int argc, char** argv) {
+    if(argc != 2) {
+        std::cerr << "usage: " << argv[0] << " <fplm-dictionary>\n";
+        return 1;
+    }
+    macaon::Lemmatizer lemmatizer(argv[1]);
+    std::string line;
+    while(std::getline(std::cin, line)) {
+        std::vector<std::string> tokens;
+        macaon::Tokenize(line, tokens, " ");
+        for(size_t i = 0; i < tokens.size(); i++) {
+            if(i > 0) std::cout << " ";
+            std::cout << lemmatizer.lemmatize(tokens[i]);
+        }
+        std::cout << "\n";
+    }
+}
diff --git a/maca_crf_tagger/src/lemmatizer.h b/maca_crf_tagger/src/lemmatizer.h
new file mode 100644
index 0000000000000000000000000000000000000000..5966c7e666037cfe677c1660c803fcb3ebb24911
--- /dev/null
+++ b/maca_crf_tagger/src/lemmatizer.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <fstream>
+#include <iostream>
+
+#include "crf_utils.hh"
+
+namespace macaon {
+    class Lemmatizer {
+        std::unordered_map<std::string, std::string> dictionary;
+        public:
+            Lemmatizer(const std::string& filename) {
+                std::ifstream input(filename);
+                if(input) {
+                    std::string line;
+                    int line_num = 1;
+                    while(std::getline(input, line)) {
+                        std::vector<std::string> tokens;
+                        macaon::Tokenize(line, tokens, "\t", true);
+                        if(tokens.size() != 4) {
+                            std::cerr << "ERROR: unexpected input in " << filename << ", line " << line_num << ": \"" << line << "\"\n";
+                            break;
+                        }
+                        std::string word = tokens[0];
+                        std::string tag = tokens[1];
+                        std::string lemma = tokens[2];
+                        std::string morpho = tokens[3];
+                        dictionary[word + "/" + tag] = lemma;
+                        line_num ++;
+                    }
+                } else {
+                    std::cerr << "ERROR: loading " << filename << "\n";
+                }
+            }
+            std::string lemmatize(const std::string& word, const std::string& tag) const {
+                std::string key = word + "/" + tag;
+                return lemmatize(key);
+            }
+            std::string lemmatize(const std::string& word_tag) const {
+                std::unordered_map<std::string, std::string>::const_iterator found = dictionary.find(word_tag);
+                if(found != dictionary.end()) {
+                    return found->second;
+                }
+                return word_tag.substr(0, word_tag.rfind('/'));
+            }
+    };
+}
+
diff --git a/maca_crf_tagger/src/maca_crf_convert_binlexicon.cc b/maca_crf_tagger/src/maca_crf_convert_binlexicon.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4704208f252dd40a627d9103c63018c07ae17153
--- /dev/null
+++ b/maca_crf_tagger/src/maca_crf_convert_binlexicon.cc
@@ -0,0 +1,46 @@
+#include "crf_decoder.hh"
+#include "crf_binlexicon.hh"
+
+int main(int argc, char** argv) {
+    if(argc != 4 && argc != 3) {
+        std::cerr << "convert: " << argv[0] << " <crf-model> <lexicon.in> <lexicon.out>\n";
+        std::cerr << "test: cat <text-lexicon> | " << argv[0] << " <crf-model> <bin-lexicon>\n";
+        return 1;
+    }
+    if(argc == 4) {
+        macaon::Decoder decoder(argv[1]);
+        macaon::BinaryLexicon lexicon(argv[2], decoder.getTagset());
+        lexicon.Write(argv[3]);
+    } else if(argc == 3) {
+        macaon::Decoder decoder(argv[1]);
+        macaon::BinaryLexicon lexicon(argv[2], decoder.getTagset());
+        std::string line;
+        int line_num = 0;
+        while(std::getline(std::cin, line)) {
+            line_num ++;
+            std::vector<int64> tags;
+            std::vector<std::string> tokens;
+            macaon::Tokenize(line, tokens, "\t ");
+            if(lexicon.GetTagsForWord(tokens[0], tags) == false) {
+                std::cerr << "WARNING: word not found \"" << tokens[0] << "\", using all tags\n";
+            }
+            if(tags.size() != tokens.size() - 1) {
+                std::cerr << "ERROR: wrong number of tags for entry " << line_num << "\n";
+                std::cerr << "    TXT: " << line << "\n";
+                std::cerr << "    BIN: " << tokens[0];
+                for(size_t i = 0; i < tags.size(); i++) {
+                    std::cerr << " " << decoder.getTagset()->Find(tags[i]);
+                }
+                std::cerr << "\n";
+            } else {
+                for(size_t i = 0; i < tags.size(); i++) {
+                    if(decoder.getTagset()->Find(tags[i]) != tokens[i + 1]) {
+                        std::cerr << "ERROR: wrong tag \"" << tokens[i + 1] << "\" => \"" << decoder.getTagset()->Find(tags[i]) << "\", entry " << line_num << "\n";
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
diff --git a/maca_crf_tagger/src/maca_crf_convert_binmodel.cc b/maca_crf_tagger/src/maca_crf_convert_binmodel.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4c6cc55a71ae7584863200b6f33ed06518f50809
--- /dev/null
+++ b/maca_crf_tagger/src/maca_crf_convert_binmodel.cc
@@ -0,0 +1,23 @@
+#include "crf_binmodel.hh"
+
+int main(int argc, char** argv) {
+    if(argc != 3 && argc != 2) {
+        std::cerr << "usage: " << argv[0] << " <from> <to> or <binmodel>\n";
+        return 1;
+    }
+    macaon::BinaryModel model;
+    if(argc == 3) {
+        model.Convert(argv[1], argv[2]);
+    } else {
+        model.Load(argv[1]);
+        std::vector<double> weights;
+        model.GetWeights("U18=a/jamais", weights);
+        for(size_t i = 0; i < weights.size(); i++) {
+            std::cout << weights[i] << " ";
+        }
+        std::cout << "\n";
+        //model.Dump();
+    }
+    return 0;
+}
+
diff --git a/maca_crf_tagger/src/maca_crf_tagger_main.cc b/maca_crf_tagger/src/maca_crf_tagger_main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0371581044ba350f38c6e3fcd20bfb1090b55b38
--- /dev/null
+++ b/maca_crf_tagger/src/maca_crf_tagger_main.cc
@@ -0,0 +1,259 @@
+/***************************************************************************
+    Copyright (C) 2011 by xxx <xxx@lif.univ-mrs.fr>
+    This file is part of maca_crf_tagger.
+
+    Maca_crf_tagger is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Maca_crf_tagger is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with maca_crf_tagger. If not, see <http://www.gnu.org/licenses/>.
+**************************************************************************/
+
+#include "maca_crf_tagger.hh"
+#include "crf_decoder.hh"
+#include "crf_features.hh"
+#include "crf_lexicon.hh"
+#include "crf_tclexdet.hh"
+
+void crf_tagger(fst::StdVectorFst &input, maca_crf_tagger_ctx *ctx, bool debug=false)
+{
+    if(debug) input.Write("debug.crf_tagger.input");
+    gfsmStateId start;
+    maca_ht_structure * ht = ctx->ms->xml_nodes_ht;
+    xmlNodePtr seg;
+    char *tokens = NULL;
+    std::vector<std::vector<std::string> >features;
+    std::vector<int>ilabels;
+    fst::StdVectorFst output;
+    if(ctx->model_filename == NULL) {
+        std::cerr << "ERROR: crf_tagger model file not specified, exiting\n";
+        exit(1); // ERROR
+    }
+    if(ctx->lexicon_filename == NULL) {
+        std::cerr << "ERROR: crf_tagger lexicon file not specified, exiting\n";
+        exit(1); // ERROR
+    }
+    fst::SymbolTable inputSymbols("words");
+    inputSymbols.AddSymbol("<eps>", 0);
+
+    // extract features
+    for(start=0; start < input.NumStates(); start++){
+        for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&input, start); !aiter.Done(); aiter.Next()) {
+            const fst::StdArc &arc = aiter.Value();
+            seg = (xmlNodePtr)maca_ht_index2adr(ht, arc.ilabel);
+            tokens = maca_sentence_get_segment_tokens_value(ctx->ms, seg);
+            ilabels.push_back(arc.ilabel);
+            inputSymbols.AddSymbol(tokens, ilabels.size());
+            aiter.SetValue(fst::StdArc(ilabels.size(), arc.olabel, arc.weight, arc.nextstate));
+            std::vector<std::string>word_features;
+            macaon::FeatureGenerator::get_pos_features(tokens, word_features);
+            features.push_back(word_features);
+            free(tokens);
+        }
+    }
+    if(debug) input.Write("debug.crf_tagger.features");
+
+    int64 isString = input.Properties(fst::kString, true);
+    if(isString & fst::kString && ctx->n == 1) {
+        if(ctx->verbose_flag > 0) std::cerr << "INFO: using linear tagger\n";
+        // faster pipeline for linear automata
+        std::vector<std::string> tags;
+        ctx->decoder->decodeString(features, tags, ctx->lexicon);
+        output.AddState();
+        output.SetStart(0);
+        for(int64 state = 0; state < input.NumStates() - 1; state++){
+            const fst::StdArc &arc = fst::ArcIterator<fst::StdVectorFst>(input, state).Value();
+            output.AddState();
+            output.AddArc(state, fst::StdArc(ilabels[arc.ilabel-1], ctx->tag_mapping[ctx->decoder->getTagset()->Find(tags[state])], arc.weight, state + 1));
+        }
+        output.SetFinal(output.NumStates() - 1, 0);
+        input = output;
+
+    } else {
+        // add possible tag labels
+        input.SetInputSymbols(&inputSymbols);
+        ctx->lexicon->AddTags(input);
+        if(debug) input.Write("debug.crf_tagger.tags");
+
+        // rescore with CRF
+        ctx->decoder->decode(features, input, output, true);
+        if(debug) output.Write("debug.crf_tagger.decoded");
+
+        // convert to macaon
+        fst::RmEpsilon(&output);
+
+        input = output;
+        for(start=0; start < input.NumStates(); start++){
+            for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&input, start); !aiter.Done(); aiter.Next()) {
+                const fst::StdArc &arc = aiter.Value();
+                aiter.SetValue(fst::StdArc(ilabels[arc.ilabel-1], ctx->tag_mapping[arc.olabel], arc.weight, arc.nextstate));
+            }
+        }
+    }
+    if(debug) input.Write("debug.crf_tagger.output");
+}
+
+void traverse_segments(maca_section *section, maca_crf_tagger_ctx *ctx)
+{
+    xmlNodePtr segs = section->xml_node_segs;	
+    xmlNodePtr seg;
+    xmlChar *ulex_id;
+    maca_ht_structure * ht = ctx->ms->xml_nodes_ht;
+    int n;
+    int index;
+    char *tokens = NULL;
+
+
+    for(seg=segs->children, n=0; seg ; seg=seg->next, n++)
+    {
+        ulex_id = xmlGetProp(seg, BAD_CAST "id");
+        index = maca_ht_adr2index(ht, seg);
+        tokens = maca_sentence_get_segment_tokens_value(ctx->ms, seg);
+        fprintf(stderr, "index = %d n = %d id = %s tokens = %s\n", index, n, ulex_id, tokens);
+    }
+}
+
+void traverse_automaton(gfsmAutomaton *a, maca_crf_tagger_ctx *ctx)
+{
+    gfsmStateId i;
+    gfsmArcIter ai;
+    gfsmArc *t;
+    xmlNodePtr n;
+    maca_ht_xmlnode *ht = ctx->ms->xml_nodes_ht;
+    xmlChar *ulex_id;
+    xmlChar *ulex_lex_id;
+
+    for(i=0; i<gfsm_automaton_n_states(a); i++){
+        for (gfsm_arciter_open_ptr(&ai,a,gfsm_automaton_find_state(a, i)); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)){
+            t = gfsm_arciter_arc(&ai);
+            n = (xmlNodePtr)maca_ht_index2adr(ht, gfsm_arc_lower(t));
+            ulex_id = xmlGetProp(n, BAD_CAST "id");
+            ulex_lex_id = xmlGetProp(n, BAD_CAST "lex_id");
+            fprintf(stderr,"index = %d ulex id = %s lex_id = %s\n",gfsm_arc_lower(t), ulex_id, ulex_lex_id);
+        }
+    }
+    /* creation de segment et d'une section */
+}
+
+
+maca_section *create_morpho_section(gfsmAutomaton *a, maca_crf_tagger_ctx *ctx)
+{
+    gfsmStateId i;
+    gfsmArc *t;
+    gfsmArcIter ai;  
+    maca_ht_structure * ht = ctx->ms->xml_nodes_ht;
+    char id_pos[500];
+    xmlNodePtr posNode = NULL;
+    xmlNodePtr lexNode = NULL;
+    maca_section * section = NULL;
+    GHashTable* segments_created = g_hash_table_new_full(g_str_hash, g_str_equal,free, NULL);
+    char * prefix_id;
+    char * temp;
+
+    //section = maca_section_create_section(MACA_POSS_SECTION);
+    //maca_sentence_add_section(ctx->ms, section);
+    section = maca_sentence_new_section(ctx->ms,MACA_MORPHO_SECTION);
+    prefix_id = (char*)malloc(sizeof(char)*(strlen(ctx->ms->id_sentence) +3));
+    sprintf(prefix_id,"%s_M",ctx->ms->id_sentence);
+
+    for(i=0; i<gfsm_automaton_n_states(a); i++){
+        for (gfsm_arciter_open(&ai,a,i); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)){
+            t = gfsm_arciter_arc(&ai);
+            if(gfsm_arc_lower(t) != gfsmEpsilon){
+                lexNode = (xmlNodePtr)maca_ht_index2adr(ht,gfsm_arc_lower(t));
+                if(lexNode){
+                    temp = (char*)xmlGetProp(lexNode, BAD_CAST "id");
+                    sprintf(id_pos, "%s_%s",temp, maca_tags_get_str(ctx->cfg, "morpho", "stype", gfsm_arc_upper(t)));
+                    free(temp);
+                    // printf("key = %s\n", id_pos);
+                    if(posNode = (xmlNodePtr)g_hash_table_lookup(segments_created, id_pos)){
+                        t->lower = maca_ht_adr2index(ht,posNode);
+                        // printf("segment %s already created\n", id_pos);
+                    }
+                    else{
+                        // printf("add segment %s %s (%s)\n", xmlGetProp(lexNode, BAD_CAST "id"), maca_tags_get_str(ctx->cfg, "morpho", "stype", gfsm_arc_upper(t)), id_pos);
+                        posNode = maca_sentence_add_segment(ctx->ms, MACA_MORPHO_SECTION, MACA_CAT_TYPE, prefix_id);
+                        t->lower = maca_ht_adr2index(ht,posNode);
+                        xmlNewProp(posNode, BAD_CAST "stype", BAD_CAST maca_tags_get_str(ctx->cfg, "morpho", "stype", gfsm_arc_upper(t)));
+                        maca_segment_add_elt_from_node(posNode, lexNode, 0);
+                        g_hash_table_insert(segments_created, strdup(id_pos), posNode);
+                    }
+                }
+            }
+        }
+    }
+    free(prefix_id);
+    //  maca_section_add_automaton(section, a);
+    maca_sentence_update_xml_automaton(ctx->ms, MACA_MORPHO_SECTION,a);
+    // section->xml_node_fsm = xmlAddChild(section->xml_node, fsm2xml(a, ht));
+
+    g_hash_table_destroy(segments_created);
+    //  fsm_affiche(a, ht);
+    //maca_section_update_xml_automaton(section, ht, a);
+    return section;
+}
+
+
+
+int maca_crf_tagger_ProcessSentence(maca_sentence * ms, maca_crf_tagger_ctx * ctx)
+{
+    maca_section * prelex_section;
+    maca_section * lex_section;
+    gfsmAutomaton *lex_automaton;
+    fst::StdVectorFst automaton; 
+
+    ctx->ms = ms;
+
+    if(!maca_sentence_is_section_loaded(ctx->ms,MACA_PRELEX_SECTION))
+    {
+        prelex_section = maca_sentence_load_section_by_type(ctx->ms,MACA_PRELEX_SECTION);
+    }
+    else prelex_section = maca_sentence_get_section(ctx->ms, MACA_PRELEX_SECTION);
+    if(prelex_section == NULL){
+        maca_msg(ctx->module, MACA_ERROR);
+        fprintf(stderr,"sentence : %s no prelex section\n", ctx->ms->id_sentence);
+        return -1;
+    }
+
+    if(!maca_sentence_is_section_loaded(ctx->ms,MACA_LEX_SECTION))
+    {
+        lex_section = maca_sentence_load_section_by_type(ctx->ms,MACA_LEX_SECTION);
+    }
+    else lex_section = maca_sentence_get_section(ctx->ms, MACA_LEX_SECTION);
+    if(lex_section == NULL){
+        maca_msg(ctx->module, MACA_ERROR);
+        fprintf(stderr,"sentence : %s no lex section\n", ctx->ms->id_sentence);
+        return -1;
+    }
+    lex_automaton = maca_sentence_get_section_automaton(ctx->ms, MACA_LEX_SECTION);
+    if(lex_automaton == NULL){
+        maca_msg(ctx->module, MACA_ERROR);
+        fprintf(stderr,"sentence : %s no lex automaton\n", ctx->ms->id_sentence);
+        return -1;
+    }
+    gfsm2fst(lex_automaton, automaton);
+    crf_tagger(automaton, ctx, ctx->verbose_flag > 4);
+    if(ctx->n > 0){
+        fst::StdVectorFst nbest;
+        fst::ShortestPath(automaton, &nbest, ctx->n);
+        create_morpho_section(fst2gfsm(nbest), ctx);
+    } else if(ctx->n == -1) {
+        create_morpho_section(fst2gfsm(automaton), ctx);
+    } else if(ctx->n == -2) {
+        macaon::DeterminizeTCLex(&automaton);
+        create_morpho_section(fst2gfsm(automaton), ctx);
+    } else {
+        fprintf(stderr, "error: unknown -n value (%d)\n", ctx->n);
+        return -1;
+    }
+    return 1;
+}
+
+
diff --git a/maca_crf_tagger/src/maca_crf_tagger_utils.cc b/maca_crf_tagger/src/maca_crf_tagger_utils.cc
new file mode 100644
index 0000000000000000000000000000000000000000..93ed95d3c540cb7fa977dc8ae02362a97579c44b
--- /dev/null
+++ b/maca_crf_tagger/src/maca_crf_tagger_utils.cc
@@ -0,0 +1,31 @@
+/***************************************************************************
+    Copyright (C) 2011 by xxx <xxx@lif.univ-mrs.fr>
+    This file is part of maca_crf_tagger.
+
+    Maca_crf_tagger is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Maca_crf_tagger is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with maca_crf_tagger. If not, see <http://www.gnu.org/licenses/>.
+**************************************************************************/
+
+#include "maca_crf_tagger.hh"
+
+char * maca_crf_tagger_GetVersion()
+{
+  return MACA_CRF_TAGGER_VERSION;
+}
+
+void maca_crf_tagger_add_stamp(xmlNodePtr node)
+{
+  add_maca_stamp(node,MACA_CRF_TAGGER_NAME,MACA_CRF_TAGGER_VERSION);
+}
+
+
diff --git a/maca_crf_tagger/src/simple_tagger.cc b/maca_crf_tagger/src/simple_tagger.cc
new file mode 100644
index 0000000000000000000000000000000000000000..14468294e06b7a5d90206334cb5b0d9db906f311
--- /dev/null
+++ b/maca_crf_tagger/src/simple_tagger.cc
@@ -0,0 +1,21 @@
+#include "simple_tagger.hh"
+
+macaon::Tagger* Tagger_new(const char* modelName, const char* lexiconName) {
+    return new macaon::Tagger(modelName, lexiconName ? lexiconName : "");
+}
+
+void Tagger_free(macaon::Tagger* tagger) {
+    delete tagger;
+}
+
+bool Tagger_ProcessSentence(macaon::Tagger* tagger, int num_words, const char** words, const char** tags) {
+    std::vector<std::string> word_vector, tag_vector;
+    for(int i = 0; i < num_words; i++) {
+        word_vector.push_back(words[i]);
+    }
+    bool result = tagger->ProcessSentence(word_vector, tag_vector);
+    for(int i = 0; i < num_words; i++) {
+        tags[i] = strdup(tag_vector[i].c_str());
+    }
+    return result;
+}
diff --git a/maca_crf_tagger/src/simple_tagger.hh b/maca_crf_tagger/src/simple_tagger.hh
new file mode 100644
index 0000000000000000000000000000000000000000..21853382ec8c2f76a35cef8747ad1083bc3c40d9
--- /dev/null
+++ b/maca_crf_tagger/src/simple_tagger.hh
@@ -0,0 +1,40 @@
+#include <vector>
+#include "crf_decoder.hh"
+#include "crf_binlexicon.hh"
+#include "crf_features.hh"
+
+namespace macaon {
+    class Tagger {
+        private:
+            macaon::Decoder decoder;
+            macaon::BinaryLexicon *lexicon;
+        public:
+            Tagger(const std::string modelName, const std::string lexiconName = "") : decoder(modelName), lexicon(NULL) {
+                if(lexiconName != "") lexicon = new macaon::BinaryLexicon(lexiconName, decoder.getTagset());
+            }
+
+            ~Tagger() {
+                if(lexicon != NULL) delete lexicon;
+            }
+
+            bool ProcessSentence(const std::vector<std::string>& words, std::vector<std::string>& tags) {
+                std::vector<std::vector<std::string> > features;
+                for(size_t i = 0; i < words.size(); i++) {
+                    std::vector<std::string> word_features;
+                    macaon::FeatureGenerator::get_pos_features(words[i], word_features);
+                    features.push_back(word_features);
+                }
+                tags.clear();
+                decoder.decodeString(features, tags, lexicon);
+                return true;
+            }
+    };
+}
+
+extern "C" {
+    macaon::Tagger* Tagger_new(const char* modelName, const char* lexiconName);
+
+    void Tagger_free(macaon::Tagger* tagger);
+
+    bool Tagger_ProcessSentence(macaon::Tagger* tagger, int num_words, const char** words, const char** tags);
+}
diff --git a/maca_crf_tagger/src/test_simple_tagger.cc b/maca_crf_tagger/src/test_simple_tagger.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ac3f021df4cd6479e649c23fb9549604ce1a7b46
--- /dev/null
+++ b/maca_crf_tagger/src/test_simple_tagger.cc
@@ -0,0 +1,27 @@
+#include <stdio.h>
+
+#include "simple_tagger.hh"
+
+int main(int argc, char** argv) {
+    int num_words = 6;
+    const char* words[] = {"le", "petit", "chat", "boit", "du", "lait"};
+    const char* tags[6];
+    int i;
+
+    if(argc != 3) {
+        fprintf(stderr, "usage: %s <tagger-model> <tagger-lexicon>\n", argv[0]);
+        return 1;
+    }
+
+    macaon::Tagger* tagger = Tagger_new(argv[1], argv[2]);
+
+    Tagger_ProcessSentence(tagger, num_words, words, tags);
+
+    for(i = 0; i < num_words; i++) {
+        printf("%s %s\n", words[i], tags[i]);
+    }
+
+    Tagger_free(tagger);
+
+    return 0;
+}
diff --git a/maca_crf_tagger/src/utf8 b/maca_crf_tagger/src/utf8
new file mode 100755
index 0000000000000000000000000000000000000000..4dee4508ec27a0923a86e9384cf41e7758a38d30
Binary files /dev/null and b/maca_crf_tagger/src/utf8 differ
diff --git a/maca_crf_tagger/src/utf8.c b/maca_crf_tagger/src/utf8.c
new file mode 100644
index 0000000000000000000000000000000000000000..4e1ea27f17001a8cad27e2bb61f93d28a856e358
--- /dev/null
+++ b/maca_crf_tagger/src/utf8.c
@@ -0,0 +1,33 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+const char *byte_to_binary(int x)
+{
+    static char b[9];
+    b[0] = '\0';
+
+    int z;
+    for (z = 128; z > 0; z >>= 1)
+    {
+        strcat(b, ((x & z) == z) ? "1" : "0");
+    }
+
+    return b;
+}
+
+int main() {
+    char word[1024];
+    fgets(word, 1024, stdin);
+    printf("%s\n", word);
+    int offset = 0;
+    while(word[offset] != 0) {
+        printf("%3d %s [%s]\n", offset, byte_to_binary(word[offset]), &word[offset]);
+        if((unsigned char)word[offset] >> 7 == 1) {
+            offset++;
+            while((unsigned char)word[offset] >> 6 == 2) offset++;
+        } else {
+            offset++;
+        }
+    }
+    return 0;
+}