Skip to content
Snippets Groups Projects
Commit 374bc8d7 authored by Franck Dary's avatar Franck Dary
Browse files

Added uppercase and prefix/suffix features

parent 25fc89f8
No related branches found
No related tags found
No related merge requests found
......@@ -57,6 +57,7 @@ class Dict
~Dict();
void save();
std::vector<float> * getValue(const std::string & s);
const std::string * getStr(const std::string & s);
std::vector<float> * getNullValue();
int getDimension();
void printForDebug(FILE * output);
......
......@@ -15,6 +15,7 @@ bool endSentence(char c);
bool endLine(char c);
void toLowerCase(std::string & s, unsigned int i);
void toUpperCase(std::string & s, unsigned int i);
bool isUpper(char c);
std::vector<std::string> split(const std::string & s);
#define ERRINFO (getFilenameFromPath(std::string(__FILE__))+ ":l." + std::to_string(__LINE__)).c_str()
......
......@@ -144,6 +144,22 @@ std::vector<float> * Dict::getValue(const std::string & s)
return addEntry(s);
}
const std::string * Dict::getStr(const std::string & s)
{
auto it = str2vec.find(s);
if(it != str2vec.end())
return &(it->first);
if(policy == Policy::Final)
return &nullValueStr;
addEntry(s);
it = str2vec.find(s);
return &(it->first);
}
void Dict::initEmbedding(std::vector<float> & vec)
{
vec[0] = 0.0; // just to shut warning up
......
......@@ -15,6 +15,9 @@ class FeatureBank
static FeatureModel::Policy dictPolicy2FeaturePolicy(Dict::Policy policy);
static FeatureModel::FeatureValue simpleBufferAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName);
static FeatureModel::FeatureValue simpleStackAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName);
static FeatureModel::FeatureValue getUppercase(Config & config, const FeatureModel::FeatureValue & fv);
static FeatureModel::FeatureValue getLength(Config & config, const FeatureModel::FeatureValue & fv);
static FeatureModel::FeatureValue getLetters(Config & config, const FeatureModel::FeatureValue & fv, int from, int to);
};
#endif
......@@ -19,7 +19,7 @@ class FeatureModel
{
Dict * dict;
std::string name;
std::string * value;
const std::string * value;
std::vector<float> * vec;
Policy policy;
};
......
......@@ -5,19 +5,77 @@ std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const
{
static char b1[1024];
static char b2[1024];
static char b3[1024];
int index;
int from;
int to;
if(sscanf(s.c_str(), "%[^.].%d.%s", b1, &index, b2) == 3)
if(sscanf(s.c_str(), "%[^.].%d.%[^.].%[^.].%d.%d", b1, &index, b2, b3, &from, &to) == 6)
{
std::string object(b1);
std::string tapeName(b2);
std::string infos(b3);
if(object == "b")
return [s, tapeName, index](Config & c)
{return simpleBufferAccess(c, index, tapeName, s);};
else if(object == "s")
return [s, tapeName, index](Config & c)
{return simpleStackAccess(c, index, tapeName, s);};
auto access = object == "b" ? simpleBufferAccess : simpleStackAccess;
if(infos == "PART")
return [access, s, tapeName, index, from, to](Config & c)
{return getLetters(c, access(c, index, tapeName, s), from, to);};
else
{
fprintf(stderr, "ERROR (%s) : unknown feature \'%s\' Aborting.\n", ERRINFO, s.c_str());
exit(1);
}
}
if(sscanf(s.c_str(), "%[^.].%d.%[^.].%[^.].%d", b1, &index, b2, b3, &from) == 5)
{
std::string object(b1);
std::string tapeName(b2);
std::string infos(b3);
auto access = object == "b" ? simpleBufferAccess : simpleStackAccess;
if(infos == "PART")
return [access, s, tapeName, index, from](Config & c)
{return getLetters(c, access(c, index, tapeName, s), from, from);};
else
{
fprintf(stderr, "ERROR (%s) : unknown feature \'%s\' Aborting.\n", ERRINFO, s.c_str());
exit(1);
}
}
if(sscanf(s.c_str(), "%[^.].%d.%[^.].%s", b1, &index, b2, b3) == 4)
{
std::string object(b1);
std::string tapeName(b2);
std::string infos(b3);
auto access = object == "b" ? simpleBufferAccess : simpleStackAccess;
if(infos == "U")
return [access, s, tapeName, index](Config & c)
{return getUppercase(c, access(c, index, tapeName, s));};
else if(infos == "LEN")
return [access, s, tapeName, index](Config & c)
{return getLength(c, access(c, index, tapeName, s));};
else
{
fprintf(stderr, "ERROR (%s) : unknown feature \'%s\' Aborting.\n", ERRINFO, s.c_str());
exit(1);
}
}
else if(sscanf(s.c_str(), "%[^.].%d.%s", b1, &index, b2) == 3)
{
std::string object(b1);
std::string tapeName(b2);
auto access = object == "b" ? simpleBufferAccess : simpleStackAccess;
return [access, s, tapeName, index](Config & c)
{return access(c, index, tapeName, s);};
}
fprintf(stderr, "ERROR (%s) : unknown feature \'%s\' Aborting.\n", ERRINFO, s.c_str());
......@@ -58,6 +116,59 @@ FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int r
return {dict, featName, &tape[index], dict->getValue(tape[index]), policy};
}
FeatureModel::FeatureValue FeatureBank::getUppercase(Config & config, const FeatureModel::FeatureValue & fv)
{
Dict * dict = config.mcd.getDictOfLine("boolean");
auto policy = dictPolicy2FeaturePolicy(dict->policy);
bool firstLetterUppercase = isUpper((*fv.value)[0]);
if(*fv.value == Dict::nullValueStr)
return {dict, fv.name, &Dict::nullValueStr, dict->getNullValue(), policy};
const std::string * str = dict->getStr(firstLetterUppercase ? std::string("true") : std::string("false"));
return {dict, fv.name, str, dict->getValue(*str), policy};
}
FeatureModel::FeatureValue FeatureBank::getLength(Config & config, const FeatureModel::FeatureValue & fv)
{
Dict * dict = config.mcd.getDictOfLine("integer");
auto policy = dictPolicy2FeaturePolicy(dict->policy);
int len = lengthPrinted(*fv.value);
if(*fv.value == Dict::nullValueStr)
return {dict, fv.name, &Dict::nullValueStr, dict->getNullValue(), policy};
const std::string * str = dict->getStr(std::to_string(len));
return {dict, fv.name, str, dict->getValue(*str), policy};
}
FeatureModel::FeatureValue FeatureBank::getLetters(Config & config, const FeatureModel::FeatureValue & fv, int from, int to)
{
Dict * dict = config.mcd.getDictOfLine("letters");
auto policy = dictPolicy2FeaturePolicy(dict->policy);
if(*fv.value == Dict::nullValueStr)
return {dict, fv.name, &Dict::nullValueStr, dict->getNullValue(), policy};
if(from < 0)
from = fv.value->size() + from;
if(to < 0)
to = fv.value->size() + to;
if(*fv.value == Dict::nullValueStr || from < 0 || to < 0 || to < from)
return {dict, fv.name, &Dict::nullValueStr, dict->getNullValue(), policy};
std::string letters;
for(int i = from; i <= to; i++)
letters.push_back((*fv.value)[i]);
const std::string * str = dict->getStr(letters);
return {dict, fv.name, str, dict->getValue(*str), policy};
}
FeatureModel::Policy FeatureBank::dictPolicy2FeaturePolicy(Dict::Policy policy)
{
return policy == Dict::Policy::Final ? FeatureModel::Policy::Final : FeatureModel::Policy::Modifiable;
......
......@@ -24,8 +24,15 @@ FeatureModel::FeatureModel(const std::string & filename)
FILE * fd = file.getDescriptor();
char buffer[1024];
while (fscanf(fd, "%s\n", buffer) == 1)
features.emplace_back(buffer);
while (fscanf(fd, "%[^\n]\n", buffer) == 1)
{
std::string featureName(buffer);
if(featureName.empty() || featureName[0] == '#' || featureName[0] == '/' ||
featureName[0] == '%')
continue;
features.emplace_back(featureName);
}
}
void FeatureModel::FeatureDescription::printForDebug(FILE * output)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment