Skip to content
Snippets Groups Projects
Select Git revision
  • master default protected
  • loss
  • producer
3 results

Dict.hpp

Blame
  • Dict.hpp 2.01 KiB
    #ifndef DICT__H
    #define DICT__H
    
    #include <string>
    #include <unordered_map>
    #include <vector>
    #include <filesystem>
    #include <mutex>
    
    class Dict
    {
      public :
    
      enum class State {Open, Closed};
      enum class Encoding {Binary, Ascii};
    
      public :
    
      static constexpr char const * unknownValueStr = "__unknownValue__";
      static constexpr char const * nullValueStr = "__nullValue__";
      static constexpr char const * oobValueStr = "__oobValue__";
      static constexpr char const * noChildValueStr = "__noChildValue__";
      static constexpr char const * emptyValueStr = "__emptyValue__";
      static constexpr char const * separatorValueStr = "__separatorValue__";
      static constexpr char const * numberValueStr = "__numberValue__";
      static constexpr char const * urlValueStr = "__urlValue__";
      static constexpr std::size_t maxEntrySize = 5000;
    
      private :
    
      std::unordered_map<std::string, int> elementsToIndexes;
      std::unordered_map<int, std::string> indexesToElements;
      std::vector<int> nbOccs;
      std::mutex elementsMutex;
      State state;
      bool isCountingOccs{false};
    
      public :
    
      Dict(State state);
      Dict(const char * filename, State state);
    
      private :
    
      void readFromFile(const char * filename);
      void insert(const std::string & element);
      void reset();
      int _getIndexOrInsert(const std::string & element, const std::string & prefix);
    
      public :
    
      void countOcc(bool isCountingOccs);
      int getIndexOrInsert(const std::string & element, const std::string & prefix);
      std::string getElement(std::size_t index);
      void setState(State state);
      State getState() const;
      void save(std::filesystem::path path, Encoding encoding) const;
      bool readEntry(std::FILE * file, int * index, int * nbOccsEntry, char * entry, Encoding encoding);
      void printEntry(std::FILE * file, int index, const std::string & entry, Encoding encoding) const;
      std::size_t size() const;
      int getNbOccs(int index) const;
      void removeRareElements();
      bool loadWord2Vec(std::filesystem::path path, std::string prefix);
      bool isSpecialValue(const std::string & value);
    };
    
    #endif