#ifndef DICT__H #define DICT__H #include <string> #include <unordered_map> #include <vector> #include <filesystem> #include <mutex> #include <set> class Dict { public : enum class State {Open, Closed}; enum class Encoding {Binary, Ascii}; public : static constexpr char const * unknownValueStr = "__unknownValue__"; static constexpr char const * nullValueStr = "__nullValue__"; static constexpr char const * oobValueStr = "__oobValue__"; static constexpr char const * noChildValueStr = "__noChildValue__"; static constexpr char const * emptyValueStr = "__emptyValue__"; static constexpr char const * separatorValueStr = "__separatorValue__"; static constexpr char const * numberValueStr = "__numberValue__"; static constexpr char const * urlValueStr = "__urlValue__"; static constexpr std::size_t maxEntrySize = 5000; private : std::unordered_map<std::string, int> elementsToIndexes; std::unordered_map<int, std::string> indexesToElements; std::vector<int> nbOccs; std::mutex elementsMutex; State state; bool isCountingOccs{false}; std::set<std::string> prefixes{""}; bool locked; public : Dict(State state); Dict(const char * filename, State state); private : void readFromFile(const char * filename); void insert(const std::string & element); void reset(); int _getIndexOrInsert(const std::string & element, const std::string & prefix); public : void lock(); void countOcc(bool isCountingOccs); std::set<std::size_t> getSpecialIndexes(); int getIndexOrInsert(const std::string & element, const std::string & prefix); std::string getElement(std::size_t index); void setState(State state); State getState() const; void save(std::filesystem::path path, Encoding encoding) const; bool readEntry(std::FILE * file, int * index, int * nbOccsEntry, char * entry, Encoding encoding); void printEntry(std::FILE * file, int index, const std::string & entry, Encoding encoding) const; std::size_t size() const; int getNbOccs(int index) const; void removeRareElements(); bool loadWord2Vec(std::filesystem::path path, std::string prefix); bool isSpecialValue(const std::string & value); }; #endif