Skip to content
Snippets Groups Projects
Commit ff44d493 authored by Franck Dary's avatar Franck Dary
Browse files

Added several needed classes

parent 90af1014
Branches
No related tags found
No related merge requests found
Showing
with 674 additions and 20 deletions
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <map> #include <map>
#include <memory>
class Dict class Dict
{ {
...@@ -28,6 +29,8 @@ class Dict ...@@ -28,6 +29,8 @@ class Dict
public : public :
static std::string nullValueStr;
static Mode str2mode(const std::string & s); static Mode str2mode(const std::string & s);
static const char * mode2str(Mode mode); static const char * mode2str(Mode mode);
static Policy str2policy(const std::string & s); static Policy str2policy(const std::string & s);
...@@ -39,16 +42,22 @@ class Dict ...@@ -39,16 +42,22 @@ class Dict
std::string filename; std::string filename;
int oneHotIndex; int oneHotIndex;
static std::map< std::string, std::unique_ptr<Dict> > str2dict;
private : private :
void initEmbedding(std::vector<float> & vec); void initEmbedding(std::vector<float> & vec);
std::vector<float> * addEntry(const std::string & s);
Dict(Policy policy, const std::string & filename);
public : public :
Dict(Policy policy, std::string filename); static Dict * getDict(Policy policy, const std::string & filename);
~Dict(); ~Dict();
void save(); void save();
std::vector<float> * getValue(const std::string & s); std::vector<float> * getValue(const std::string & s);
std::vector<float> * getNullValue();
}; };
#endif #endif
...@@ -2,6 +2,9 @@ ...@@ -2,6 +2,9 @@
#include "File.hpp" #include "File.hpp"
#include "util.hpp" #include "util.hpp"
std::string Dict::nullValueStr = "_nullVALUEstr_";
std::map< std::string, std::unique_ptr<Dict> > Dict::str2dict;
Dict::Mode Dict::str2mode(const std::string & s) Dict::Mode Dict::str2mode(const std::string & s)
{ {
if(s == "OneHot") if(s == "OneHot")
...@@ -38,7 +41,7 @@ const char * Dict::mode2str(Mode mode) ...@@ -38,7 +41,7 @@ const char * Dict::mode2str(Mode mode)
return "Embeddings"; return "Embeddings";
} }
Dict::Dict(Policy policy, std::string filename) Dict::Dict(Policy policy, const std::string & filename)
{ {
auto badFormatAndAbort = [&](std::string errInfo) auto badFormatAndAbort = [&](std::string errInfo)
{ {
...@@ -62,6 +65,8 @@ Dict::Dict(Policy policy, std::string filename) ...@@ -62,6 +65,8 @@ Dict::Dict(Policy policy, std::string filename)
name = b1; name = b1;
mode = str2mode(b2); mode = str2mode(b2);
addEntry(nullValueStr);
// If policy is FromZero, we don't need to read the current entries // If policy is FromZero, we don't need to read the current entries
if(this->policy == Policy::FromZero) if(this->policy == Policy::FromZero)
return; return;
...@@ -132,6 +137,30 @@ std::vector<float> * Dict::getValue(const std::string & s) ...@@ -132,6 +137,30 @@ std::vector<float> * Dict::getValue(const std::string & s)
if(it != str2vec.end()) if(it != str2vec.end())
return &(it->second); return &(it->second);
if(policy == Policy::Final)
return getNullValue();
return addEntry(s);
}
void Dict::initEmbedding(std::vector<float> & vec)
{
vec[0] = 0.0; // just to shut warning up
// Here initialize a new embedding, doing nothing = all zeroes
}
Dict::~Dict()
{
save();
}
std::vector<float> * Dict::getNullValue()
{
return &str2vec[nullValueStr];
}
std::vector<float> * Dict::addEntry(const std::string & s)
{
str2vec.emplace(s, std::vector<float>(dimension, 0.0)); str2vec.emplace(s, std::vector<float>(dimension, 0.0));
auto & vec = str2vec[s]; auto & vec = str2vec[s];
...@@ -150,13 +179,14 @@ std::vector<float> * Dict::getValue(const std::string & s) ...@@ -150,13 +179,14 @@ std::vector<float> * Dict::getValue(const std::string & s)
return &vec; return &vec;
} }
void Dict::initEmbedding(std::vector<float> & vec) Dict * Dict::getDict(Policy policy, const std::string & filename)
{ {
vec[0] = 0.0; // just to shut warning up auto it = str2dict.find(filename);
// Here initialize a new embedding, doing nothing = all zeroes if(it != str2dict.end())
} return it->second.get();
Dict::~Dict() str2dict.insert(std::make_pair(filename, std::unique_ptr<Dict>(new Dict(policy, filename))));
{
save(); return str2dict[filename].get();
} }
#ifndef ACTION__H
#define ACTION__H
#include <functional>
#include "Config.hpp"
class Action
{
public :
struct BasicAction
{
enum Type
{
Push,
Pop,
Write
};
Type type;
std::string data;
std::function<void(Config & config)> apply;
std::function<void(Config & config)> undo;
std::function<bool(Config & config)> appliable;
std::string to_string();
};
private :
std::vector<BasicAction> sequence;
std::string name;
public :
Action(const std::string & name);
void apply(Config & config);
void undo(Config & config);
void undoOnlyStack(Config & config);
bool appliable(Config & config);
void printForDebug(FILE * output);
};
#endif
#ifndef ACTIONBANK__H
#define ACTIONBANK__H
#include "Action.hpp"
class ActionBank
{
public :
static std::vector<Action::BasicAction> str2sequence(const std::string & name);
private :
static void simpleBufferWrite(Config & config, const std::string & tapeName,
const std::string & value, int relativeIndex);
static bool simpleBufferWriteAppliable(Config & config,
const std::string & tapeName, int relativeIndex);
};
#endif
#ifndef ACTIONSET__H
#define ACTIONSET__H
#include <vector>
#include "Action.hpp"
class ActionSet
{
public :
std::string name;
std::vector<Action> actions;
public :
ActionSet(const std::string & filename);
void printForDebug(FILE * output);
};
#endif
#ifndef CLASSIFIER__H
#define CLASSIFIER__H
#include <string>
#include <memory>
#include "FeatureModel.hpp"
#include "ActionSet.hpp"
class Classifier
{
public :
enum Type
{
Prediction,
Information,
Forced
};
std::string name;
Type type;
std::unique_ptr<FeatureModel> fm;
std::unique_ptr<ActionSet> as;
public :
static Type str2type(const std::string & filename);
Classifier(const std::string & filename);
};
#endif
#ifndef CONFIG__H #ifndef CONFIG__H
#define CONFIG__H #define CONFIG__H
#include <vector>
#include "MCD.hpp" #include "MCD.hpp"
class Config class Config
...@@ -9,6 +10,8 @@ class Config ...@@ -9,6 +10,8 @@ class Config
MCD & mcd; MCD & mcd;
std::vector< std::vector< std::string> > tapes; std::vector< std::vector< std::string> > tapes;
std::vector<int> stack;
int head;
public : public :
......
#ifndef FEATUREBANK__H
#define FEATUREBANK__H
#include <functional>
#include "FeatureModel.hpp"
class FeatureBank
{
public :
static std::function<FeatureModel::FeatureValue(Config &)> str2func(const std::string & s);
private :
static FeatureModel::Policy dictPolicy2FeaturePolicy(Dict::Policy policy);
static FeatureModel::FeatureValue simpleBufferAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName);
static FeatureModel::FeatureValue simpleStackAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName);
};
#endif
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
class FeatureModel class FeatureModel
{ {
private : public :
enum Policy enum Policy
{ {
...@@ -17,10 +17,21 @@ class FeatureModel ...@@ -17,10 +17,21 @@ class FeatureModel
struct FeatureValue struct FeatureValue
{ {
std::string name;
std::string * value;
std::vector<float> * vec; std::vector<float> * vec;
Policy policy; Policy policy;
}; };
struct FeatureDescription
{
std::vector<FeatureValue> values;
void printForDebug(FILE * output);
};
private :
struct Feature struct Feature
{ {
std::string name; std::string name;
...@@ -33,7 +44,10 @@ class FeatureModel ...@@ -33,7 +44,10 @@ class FeatureModel
public : public :
std::vector<FeatureValue> getFeatureDescription(Config & config); static const char * policy2str(Policy policy);
FeatureModel(const std::string & filename);
FeatureDescription getFeatureDescription(Config & config);
}; };
#endif #endif
...@@ -11,7 +11,7 @@ class MCD ...@@ -11,7 +11,7 @@ class MCD
{ {
int num; int num;
std::string name; std::string name;
Dict dict; Dict * dict;
int inputColumn; int inputColumn;
Line(int num, std::string name, std::string dictFilename, std::string dictPolicy, int inputColumn); Line(int num, std::string name, std::string dictFilename, std::string dictPolicy, int inputColumn);
......
#ifndef TAPEMACHINE__H
#define TAPEMACHINE__H
#include "Classifier.hpp"
class TapeMachine
{
public :
class State;
struct Transition
{
State * dest;
std::string actionPrefix;
int headMvt;
Transition(State * dest, const std::string & prefix, int mvt);
};
struct State
{
std::string name;
Classifier * classifier;
std::vector<Transition> transitions;
State(const std::string & name, Classifier * classifier);
};
private :
std::string name;
std::map< std::string, std::unique_ptr<Classifier> > str2classifier;
std::map< std::string, std::unique_ptr<State> > str2state;
public :
TapeMachine(const std::string & filename);
};
#endif
#include "Action.hpp"
#include "ActionBank.hpp"
void Action::apply(Config & config)
{
for(auto & basicAction : sequence)
basicAction.apply(config);
}
bool Action::appliable(Config & config)
{
for(auto & basicAction : sequence)
if (!basicAction.appliable(config))
return false;
return true;
}
void Action::undo(Config & config)
{
for(int i = sequence.size()-1; i >= 0; i ++)
sequence[i].undo(config);
}
void Action::undoOnlyStack(Config & config)
{
for(int i = sequence.size()-1; i >= 0; i ++)
{
auto type = sequence[i].type;
if(type == BasicAction::Type::Write)
continue;
sequence[i].undo(config);
}
}
Action::Action(const std::string & name)
{
this->name = name;
this->sequence = ActionBank::str2sequence(name);
}
std::string Action::BasicAction::to_string()
{
if(type == Type::Push)
return "push " + data;
else if(type == Type::Pop)
return "pop " + data;
else if(type == Type::Write)
return "write " + data;
return "null";
}
void Action::printForDebug(FILE * output)
{
for(auto & basic : sequence)
fprintf(output, "%s ", basic.to_string().c_str());
fprintf(output, "\n");
}
#include "ActionBank.hpp"
#include "util.hpp"
std::vector<Action::BasicAction> ActionBank::str2sequence(const std::string & name)
{
auto invalidNameAndAbort = [&](const char * errInfo)
{
fprintf(stderr, "ERROR (%s) : unknown action name \'%s\' Aborting.\n", errInfo, name.c_str());
exit(1);
};
std::vector<Action::BasicAction> sequence;
char b1[1024];
char b2[1024];
char b3[1024];
if (sscanf(name.c_str(), "%s", b1) != 1)
invalidNameAndAbort(ERRINFO);
if(std::string(b1) == "TAG")
{
int relativeIndex;
if (sscanf(name.c_str(), "%s %d %s %s", b1, &relativeIndex, b2, b3) != 4)
invalidNameAndAbort(ERRINFO);
std::string tapeName(b2);
std::string value(b3);
auto apply = [tapeName, value, relativeIndex](Config & c)
{return simpleBufferWrite(c, tapeName, value, relativeIndex);};
auto undo = [tapeName, relativeIndex](Config & c)
{return simpleBufferWrite(c, tapeName, "", relativeIndex);};
auto appliable = [tapeName, relativeIndex](Config & c)
{return simpleBufferWriteAppliable(c, tapeName, relativeIndex);};
Action::BasicAction basicAction =
{Action::BasicAction::Type::Write, value, apply, undo, appliable};
sequence.emplace_back(basicAction);
}
else
invalidNameAndAbort(ERRINFO);
return sequence;
}
void ActionBank::simpleBufferWrite(Config & config, const std::string & tapeName,
const std::string & value, int relativeIndex)
{
auto & tape = config.getTape(tapeName);
int index = config.head + relativeIndex;
if (!simpleBufferWriteAppliable(config, tapeName, relativeIndex))
{
fprintf(stderr, "ERROR (%s) : Write action not appliable (out of tape %s bounds). Aborting.\n", ERRINFO, tapeName.c_str());
exit(1);
}
tape[index] = value;
}
bool ActionBank::simpleBufferWriteAppliable(Config & config,
const std::string & tapeName, int relativeIndex)
{
auto & tape = config.getTape(tapeName);
int index = config.head + relativeIndex;
return !(index < 0 || index >= (int)tape.size());
}
#include "ActionSet.hpp"
#include "File.hpp"
#include "util.hpp"
ActionSet::ActionSet(const std::string & filename)
{
File file(filename, "r");
FILE * fd = file.getDescriptor();
char buffer[1024];
while(fscanf(fd, "%[^\n]\n", buffer) == 1)
actions.emplace_back(buffer);
this->name = getFilenameFromPath(filename);
}
void ActionSet::printForDebug(FILE * output)
{
fprintf(output, "ActionSet %s :\n", name.c_str());
for(auto & action : actions)
action.printForDebug(output);
}
#include "Classifier.hpp"
#include "File.hpp"
#include "util.hpp"
Classifier::Classifier(const std::string & filename)
{
auto badFormatAndAbort = [&filename](const char * errInfo)
{
fprintf(stderr, "ERROR (%s) : file %s bad format. Aborting.\n", errInfo, filename.c_str());
exit(1);
};
File file(filename, "r");
FILE * fd = file.getDescriptor();
char buffer[1024];
if(fscanf(fd, "%s\n", buffer) != 1)
badFormatAndAbort(ERRINFO);
name = buffer;
if(fscanf(fd, "%s\n", buffer) != 1)
badFormatAndAbort(ERRINFO);
type = str2type(buffer);
if(fscanf(fd, "%s\n", buffer) != 1)
badFormatAndAbort(ERRINFO);
fm.reset(new FeatureModel(buffer));
if(fscanf(fd, "%s\n", buffer) != 1)
badFormatAndAbort(ERRINFO);
as.reset(new ActionSet(buffer));
}
Classifier::Type Classifier::str2type(const std::string & s)
{
if(s == "Prediction")
return Type::Prediction;
else if (s == "Information")
return Type::Information;
else if (s == "Forced")
return Type::Forced;
fprintf(stderr, "ERROR (%s) : invalid type \'%s\'. Aborting.\n", ERRINFO, s.c_str());
exit(1);
return Type::Prediction;
}
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
Config::Config(MCD & mcd) : mcd(mcd), tapes(mcd.getNbLines()) Config::Config(MCD & mcd) : mcd(mcd), tapes(mcd.getNbLines())
{ {
head = 0;
} }
std::vector<std::string> & Config::getTape(const std::string & name) std::vector<std::string> & Config::getTape(const std::string & name)
......
#include "FeatureBank.hpp"
#include "util.hpp"
std::function<FeatureModel::FeatureValue(Config &)> FeatureBank::str2func(const std::string & s)
{
static char b1[1024];
static char b2[1024];
int index;
if(sscanf(s.c_str(), "%[^.].%d.%s", b1, &index, b2) == 3)
{
std::string object(b1);
std::string tapeName(b2);
if(object == "b")
return [s, tapeName, index](Config & c)
{return simpleBufferAccess(c, index, tapeName, s);};
else if(object == "s")
return [s, tapeName, index](Config & c)
{return simpleStackAccess(c, index, tapeName, s);};
}
fprintf(stderr, "ERROR (%s) : unknown feature \'%s\' Aborting.\n", ERRINFO, s.c_str());
exit(1);
return nullptr;
}
FeatureModel::FeatureValue FeatureBank::simpleBufferAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName)
{
auto & tape = config.getTape(tapeName);
Dict * dict = config.mcd.getDictOfLine(tapeName);
auto policy = dictPolicy2FeaturePolicy(dict->policy);
int index = config.head + relativeIndex;
if(index < 0 || index >= (int)tape.size())
return {featName+"(null)", &Dict::nullValueStr, dict->getNullValue(), policy};
return {featName, &tape[index], dict->getValue(tape[index]), policy};
}
FeatureModel::FeatureValue FeatureBank::simpleStackAccess(Config & config, int relativeIndex, const std::string & tapeName, const std::string & featName)
{
auto & tape = config.getTape(tapeName);
Dict * dict = config.mcd.getDictOfLine(tapeName);
auto policy = dictPolicy2FeaturePolicy(dict->policy);
if(relativeIndex < 0 || relativeIndex >= (int)config.stack.size())
return {featName+"(null)", &Dict::nullValueStr, dict->getNullValue(), policy};
int index = config.stack[config.stack.size()-1-relativeIndex];
if(index < 0 || index >= (int)tape.size())
return {featName+"(null)", &Dict::nullValueStr, dict->getNullValue(), policy};
return {featName, &tape[index], dict->getValue(tape[index]), policy};
}
FeatureModel::Policy FeatureBank::dictPolicy2FeaturePolicy(Dict::Policy policy)
{
return policy == Dict::Policy::Final ? FeatureModel::Policy::Final : FeatureModel::Policy::Modifiable;
}
#include "FeatureModel.hpp" #include "FeatureModel.hpp"
#include "FeatureBank.hpp"
#include "File.hpp"
FeatureModel::Feature::Feature(const std::string & name) FeatureModel::Feature::Feature(const std::string & name)
{ {
this->name = name; this->name = name;
//TODO finir this->func = FeatureBank::str2func(name);
} }
std::vector<FeatureModel::FeatureValue> FeatureModel::getFeatureDescription(Config & config) FeatureModel::FeatureDescription FeatureModel::getFeatureDescription(Config & config)
{ {
std::vector<FeatureValue> featureDescription; std::vector<FeatureValue> featureDescription;
for(auto & feature : features) for(auto & feature : features)
featureDescription.emplace_back(feature.func(config)); featureDescription.emplace_back(feature.func(config));
return featureDescription; return {featureDescription};
}
FeatureModel::FeatureModel(const std::string & filename)
{
File file(filename, "r");
FILE * fd = file.getDescriptor();
char buffer[1024];
while (fscanf(fd, "%s\n", buffer) == 1)
features.emplace_back(buffer);
}
void FeatureModel::FeatureDescription::printForDebug(FILE * output)
{
int nbCol = 80;
for(int i = 0; i < nbCol; i++)
fprintf(output, "-");
fprintf(output, "\n");
for(auto featValue : values)
{
fprintf(output, "Feature=%s, Policy=%s, Value=%s\n", featValue.name.c_str(), policy2str(featValue.policy), featValue.value->c_str());
for(float val : *featValue.vec)
fprintf(output, "%.2f ", val);
fprintf(output, "\n");
}
for(int i = 0; i < nbCol; i++)
fprintf(output, "-");
fprintf(output, "\n");
}
const char * FeatureModel::policy2str(Policy policy)
{
if(policy == Policy::Final)
return "Final";
else if (policy == Policy::Modifiable)
return "Modifiable";
return "null";
} }
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
#include "File.hpp" #include "File.hpp"
#include "util.hpp" #include "util.hpp"
MCD::Line::Line(int num, std::string name, std::string dictFilename, std::string dictPolicy, int inputColumn) MCD::Line::Line(int num, std::string name, std::string dictFilename,
: dict(Dict::str2policy(dictPolicy), dictFilename) std::string dictPolicy, int inputColumn)
{ {
this->dict = Dict::getDict(Dict::str2policy(dictPolicy), dictFilename);
this->num = num; this->num = num;
this->name = name; this->name = name;
this->inputColumn = inputColumn; this->inputColumn = inputColumn;
...@@ -38,17 +39,17 @@ MCD::MCD(const std::string & filename) ...@@ -38,17 +39,17 @@ MCD::MCD(const std::string & filename)
Dict * MCD::getDictOfLine(int num) Dict * MCD::getDictOfLine(int num)
{ {
return &num2line[num]->dict; return num2line[num]->dict;
} }
Dict * MCD::getDictOfLine(const std::string & name) Dict * MCD::getDictOfLine(const std::string & name)
{ {
return &name2line[name]->dict; return name2line[name]->dict;
} }
Dict * MCD::getDictOfInputCol(int col) Dict * MCD::getDictOfInputCol(int col)
{ {
return &col2line[col]->dict; return col2line[col]->dict;
} }
int MCD::getLineOfName(const std::string & name) int MCD::getLineOfName(const std::string & name)
......
#include "TapeMachine.hpp"
#include "File.hpp"
#include "util.hpp"
TapeMachine::TapeMachine(const std::string & filename)
{
auto badFormatAndAbort = [&filename](const std::string & errInfo)
{
fprintf(stderr, "ERROR (%s) : file %s bad format. Aborting.\n", errInfo.c_str(), filename.c_str());
exit(1);
};
File file(filename, "r");
FILE * fd = file.getDescriptor();
char buffer[1024];
char buffer2[1024];
char buffer3[1024];
// Reading the name
if(fscanf(fd, "%[^\n]\n", buffer) != 1)
badFormatAndAbort(ERRINFO);
name = buffer;
// Reading %CLASSIFIERS
if(fscanf(fd, "%%%s\n", buffer) != 1 || buffer != std::string("CLASSIFIERS"))
badFormatAndAbort(ERRINFO);
while(fscanf(fd, "%%%s\n", buffer) != 1)
{
// Reading a classifier
if(fscanf(fd, "%s %s\n", buffer, buffer2) != 2)
badFormatAndAbort(ERRINFO);
str2classifier.emplace(buffer, std::unique_ptr<Classifier>(new Classifier(buffer2)));
}
// Reading %STATES
if(buffer != std::string("STATES"))
badFormatAndAbort(ERRINFO);
while(fscanf(fd, "%%%s\n", buffer) != 1)
{
// Reading a state
if(fscanf(fd, "%s %s\n", buffer, buffer2) != 2)
badFormatAndAbort(ERRINFO);
if(str2classifier.count(buffer2) == 0)
badFormatAndAbort(ERRINFO + std::string(" unknown classifier \'") + buffer2 + std::string("\'"));
Classifier * classifier = str2classifier[buffer2].get();
str2state.emplace(buffer, std::unique_ptr<State>(new State(buffer, classifier)));
}
// Reading %TRANSITIONS
if(buffer != std::string("TRANSITIONS"))
badFormatAndAbort(ERRINFO);
// Reading all transitions
int mvt;
while(fscanf(fd, "%s %s %s %d\n", buffer, buffer2, buffer3, &mvt) == 4)
{
std::string src(buffer);
std::string dest(buffer2);
std::string prefix(buffer3);
if(str2state.count(src) == 0)
badFormatAndAbort(ERRINFO + std::string(" unknown state \'") + src + std::string("\'"));
if(str2state.count(dest) == 0)
badFormatAndAbort(ERRINFO + std::string(" unknown state \'") + dest + std::string("\'"));
State * srcState = str2state[src].get();
State * destState = str2state[dest].get();
srcState->transitions.emplace_back(destState, prefix, mvt);
}
}
TapeMachine::State::State(const std::string & name, Classifier * classifier)
{
this->name = name;
this->classifier = classifier;
}
TapeMachine::Transition::Transition(State * dest, const std::string & prefix, int mvt)
{
this->dest = dest;
this->actionPrefix = prefix;
this->headMvt = mvt;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment