Skip to content
Snippets Groups Projects
Commit dfa2a3a3 authored by Franck Dary's avatar Franck Dary
Browse files

Changed the way unknown values are handled by dicts. Now we separate unknown...

Changed the way unknown values are handled by dicts. Now we separate unknown values from inexistant values
parent 22ec0b22
No related branches found
No related tags found
No related merge requests found
...@@ -32,6 +32,7 @@ class Dict ...@@ -32,6 +32,7 @@ class Dict
public : public :
static std::string nullValueStr; static std::string nullValueStr;
static std::string unknownValueStr;
static Mode str2mode(const std::string & s); static Mode str2mode(const std::string & s);
static const char * mode2str(Mode mode); static const char * mode2str(Mode mode);
...@@ -69,6 +70,7 @@ class Dict ...@@ -69,6 +70,7 @@ class Dict
std::vector<float> * getValue(const std::string & s); std::vector<float> * getValue(const std::string & s);
const std::string * getStr(const std::string & s); const std::string * getStr(const std::string & s);
const std::string * getStrFasttext(const std::string & s); const std::string * getStrFasttext(const std::string & s);
std::vector<float> * getUnknownValue();
std::vector<float> * getNullValue(); std::vector<float> * getNullValue();
int getDimension(); int getDimension();
void printForDebug(FILE * output); void printForDebug(FILE * output);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "util.hpp" #include "util.hpp"
std::string Dict::nullValueStr = "_nullVALUEstr_"; std::string Dict::nullValueStr = "_nullVALUEstr_";
std::string Dict::unknownValueStr = "_unknownVALUEstr_";
std::map< std::string, std::unique_ptr<Dict> > Dict::str2dict; std::map< std::string, std::unique_ptr<Dict> > Dict::str2dict;
Dict::Mode Dict::str2mode(const std::string & s) Dict::Mode Dict::str2mode(const std::string & s)
...@@ -66,6 +67,7 @@ Dict::Dict(Policy policy, const std::string & filename) ...@@ -66,6 +67,7 @@ Dict::Dict(Policy policy, const std::string & filename)
mode = str2mode(b2); mode = str2mode(b2);
addEntry(nullValueStr); addEntry(nullValueStr);
addEntry(unknownValueStr);
// If a fasttext pretrained embedding file is specified // If a fasttext pretrained embedding file is specified
if(fscanf(fd, "Fasttext : %s\n", b1) == 1) if(fscanf(fd, "Fasttext : %s\n", b1) == 1)
...@@ -164,7 +166,7 @@ std::vector<float> * Dict::getValue(const std::string & s) ...@@ -164,7 +166,7 @@ std::vector<float> * Dict::getValue(const std::string & s)
if(ftEmbeddings.get()) if(ftEmbeddings.get())
return getValueFasttext(s); return getValueFasttext(s);
return getNullValue(); return getUnknownValue();
} }
return addEntry(s); return addEntry(s);
...@@ -200,7 +202,7 @@ const std::string * Dict::getStr(const std::string & s) ...@@ -200,7 +202,7 @@ const std::string * Dict::getStr(const std::string & s)
{ {
if(ftEmbeddings.get()) if(ftEmbeddings.get())
return getStrFasttext(s); return getStrFasttext(s);
return &nullValueStr; return &unknownValueStr;
} }
addEntry(s); addEntry(s);
...@@ -265,6 +267,11 @@ Dict::~Dict() ...@@ -265,6 +267,11 @@ Dict::~Dict()
save(); save();
} }
std::vector<float> * Dict::getUnknownValue()
{
return &str2vec[unknownValueStr];
}
std::vector<float> * Dict::getNullValue() std::vector<float> * Dict::getNullValue()
{ {
return &str2vec[nullValueStr]; return &str2vec[nullValueStr];
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment