#ifndef DICT__H #define DICT__H #include <string> #include <unordered_map> #include <vector> #include <filesystem> class Dict { public : enum State {Open, Closed}; enum Encoding {Binary, Ascii}; public : static constexpr char const * unknownValueStr = "__unknownValue__"; static constexpr char const * nullValueStr = "__nullValue__"; static constexpr char const * emptyValueStr = "__emptyValue__"; static constexpr char const * separatorValueStr = "__separatorValue__"; static constexpr char const * numberValueStr = "__numberValue__"; static constexpr char const * urlValueStr = "__urlValue__"; static constexpr std::size_t maxEntrySize = 5000; private : std::unordered_map<std::string, int> elementsToIndexes; std::vector<int> nbOccs; State state; bool isCountingOccs{false}; public : Dict(State state); Dict(const char * filename, State state); private : void readFromFile(const char * filename); void insert(const std::string & element); public : void countOcc(bool isCountingOccs); int getIndexOrInsert(const std::string & element); void setState(State state); State getState() const; void save(std::filesystem::path path, Encoding encoding) const; bool readEntry(std::FILE * file, int * index, int * nbOccsEntry, char * entry, Encoding encoding); void printEntry(std::FILE * file, int index, const std::string & entry, Encoding encoding) const; std::size_t size() const; int getNbOccs(int index) const; void removeRareElements(); void loadWord2Vec(std::filesystem::path path); }; #endif