Select Git revision
-
Franck Dary authoredFranck Dary authored
Dict.hpp 2.01 KiB
#ifndef DICT__H
#define DICT__H
#include <string>
#include <unordered_map>
#include <vector>
#include <filesystem>
#include <mutex>
class Dict
{
public :
enum class State {Open, Closed};
enum class Encoding {Binary, Ascii};
public :
static constexpr char const * unknownValueStr = "__unknownValue__";
static constexpr char const * nullValueStr = "__nullValue__";
static constexpr char const * oobValueStr = "__oobValue__";
static constexpr char const * noChildValueStr = "__noChildValue__";
static constexpr char const * emptyValueStr = "__emptyValue__";
static constexpr char const * separatorValueStr = "__separatorValue__";
static constexpr char const * numberValueStr = "__numberValue__";
static constexpr char const * urlValueStr = "__urlValue__";
static constexpr std::size_t maxEntrySize = 5000;
private :
std::unordered_map<std::string, int> elementsToIndexes;
std::unordered_map<int, std::string> indexesToElements;
std::vector<int> nbOccs;
std::mutex elementsMutex;
State state;
bool isCountingOccs{false};
public :
Dict(State state);
Dict(const char * filename, State state);
private :
void readFromFile(const char * filename);
void insert(const std::string & element);
void reset();
int _getIndexOrInsert(const std::string & element, const std::string & prefix);
public :
void countOcc(bool isCountingOccs);
int getIndexOrInsert(const std::string & element, const std::string & prefix);
std::string getElement(std::size_t index);
void setState(State state);
State getState() const;
void save(std::filesystem::path path, Encoding encoding) const;
bool readEntry(std::FILE * file, int * index, int * nbOccsEntry, char * entry, Encoding encoding);
void printEntry(std::FILE * file, int index, const std::string & entry, Encoding encoding) const;
std::size_t size() const;
int getNbOccs(int index) const;
void removeRareElements();
bool loadWord2Vec(std::filesystem::path path, std::string prefix);
bool isSpecialValue(const std::string & value);
};
#endif