Skip to content
Snippets Groups Projects
Commit 32459c5a authored by Franck Dary's avatar Franck Dary
Browse files

Added command line arguments for train and many options

parent b3f744f5
Branches
No related tags found
No related merge requests found
...@@ -12,6 +12,8 @@ class MLP ...@@ -12,6 +12,8 @@ class MLP
{ {
public : public :
using Examples = std::pair< std::vector<int>, std::vector<std::pair<int, FeatureModel::FeatureDescription> > >;
enum Activation enum Activation
{ {
SIGMOID, SIGMOID,
...@@ -72,7 +74,8 @@ class MLP ...@@ -72,7 +74,8 @@ class MLP
MLP(const std::string & filename); MLP(const std::string & filename);
std::vector<float> predict(FeatureModel::FeatureDescription & fd, int goldClass); std::vector<float> predict(FeatureModel::FeatureDescription & fd, int goldClass);
int trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end); int trainOnBatch(Examples & examples, int start, int end);
int getScoreOnBatch(Examples & examples, int start, int end);
void save(const std::string & filename); void save(const std::string & filename);
}; };
......
...@@ -255,7 +255,7 @@ void MLP::printParameters(FILE * output) ...@@ -255,7 +255,7 @@ void MLP::printParameters(FILE * output)
fprintf(output, "Parameters : NOT IMPLEMENTED\n"); fprintf(output, "Parameters : NOT IMPLEMENTED\n");
} }
int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end) int MLP::trainOnBatch(Examples & examples, int start, int end)
{ {
dynet::ComputationGraph cg; dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs; std::vector<dynet::Expression> inputs;
...@@ -263,17 +263,21 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio ...@@ -263,17 +263,21 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio
int inputDim = 0; int inputDim = 0;
int outputDim = layers.back().output_dim; int outputDim = layers.back().output_dim;
for(auto it = start; it != end; it++) for(int i = start; i < end; i++)
{ {
auto & order = examples.first;
int exampleIndex = order[i];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions; std::vector<dynet::Expression> expressions;
expressions.clear(); expressions.clear();
for (auto & featValue : it->second.values) for (auto & featValue : example.second.values)
expressions.emplace_back(featValue2Expression(cg, featValue)); expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions)); inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows(); inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)it->first); goldClasses.emplace_back((unsigned)example.first);
} }
dynet::Expression concatenation = dynet::concatenate(inputs); dynet::Expression concatenation = dynet::concatenate(inputs);
...@@ -309,6 +313,56 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio ...@@ -309,6 +313,56 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio
return nbCorrect; return nbCorrect;
} }
int MLP::getScoreOnBatch(Examples & examples, int start, int end)
{
dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs;
std::vector<unsigned int> goldClasses;
int inputDim = 0;
int outputDim = layers.back().output_dim;
for(int i = start; i < end; i++)
{
auto & order = examples.first;
int exampleIndex = order[i];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions;
expressions.clear();
for (auto & featValue : example.second.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)example.first);
}
dynet::Expression concatenation = dynet::concatenate(inputs);
int batchSize = end - start;
dynet::Expression batchedInput = reshape((concatenation),
dynet::Dim({(unsigned)inputDim}, batchSize));
dynet::Expression output = run(cg, batchedInput);
int nbCorrect = 0;
std::vector<float> predictions = as_vector(output.value());
for (unsigned int i = 0; (int)i < batchSize; i++)
{
int prediction = 0;
for (unsigned int j = 0; (int)j < outputDim; j++)
if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
prediction = (int)j;
if(prediction == (int)goldClasses[i])
nbCorrect++;
}
return nbCorrect;
}
void MLP::save(const std::string & filename) void MLP::save(const std::string & filename)
{ {
saveStruct(filename); saveStruct(filename);
......
...@@ -31,6 +31,11 @@ class Classifier ...@@ -31,6 +31,11 @@ class Classifier
std::unique_ptr<ActionSet> as; std::unique_ptr<ActionSet> as;
std::unique_ptr<MLP> mlp; std::unique_ptr<MLP> mlp;
Oracle * oracle; Oracle * oracle;
std::string modelFilename;
private :
void save(const std::string & filename);
public : public :
...@@ -42,10 +47,11 @@ class Classifier ...@@ -42,10 +47,11 @@ class Classifier
FeatureModel::FeatureDescription getFeatureDescription(Config & config); FeatureModel::FeatureDescription getFeatureDescription(Config & config);
std::string getOracleAction(Config & config); std::string getOracleAction(Config & config);
int getOracleActionIndex(Config & config); int getOracleActionIndex(Config & config);
int trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end); int getScoreOnBatch(MLP::Examples & examples, int start, int end);
int trainOnBatch(MLP::Examples & examples, int start, int end);
std::string getActionName(int actionIndex); std::string getActionName(int actionIndex);
void initClassifier(Config & config); void initClassifier(Config & config);
void save(const std::string & filename); void save();
}; };
#endif #endif
...@@ -42,6 +42,11 @@ Classifier::Classifier(const std::string & filename, bool trainMode) ...@@ -42,6 +42,11 @@ Classifier::Classifier(const std::string & filename, bool trainMode)
badFormatAndAbort(ERRINFO); badFormatAndAbort(ERRINFO);
oracle = Oracle::getOracle(buffer); oracle = Oracle::getOracle(buffer);
if(fscanf(fd, "Model : %s\n", buffer) != 1)
badFormatAndAbort(ERRINFO);
modelFilename = buffer;
} }
Classifier::Type Classifier::str2type(const std::string & s) Classifier::Type Classifier::str2type(const std::string & s)
...@@ -121,9 +126,14 @@ int Classifier::getOracleActionIndex(Config & config) ...@@ -121,9 +126,14 @@ int Classifier::getOracleActionIndex(Config & config)
return as->getActionIndex(oracle->getAction(config)); return as->getActionIndex(oracle->getAction(config));
} }
int Classifier::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end) int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end)
{ {
return mlp->trainOnBatch(start, end); return mlp->trainOnBatch(examples, start, end);
}
int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end)
{
return mlp->getScoreOnBatch(examples, start, end);
} }
std::string Classifier::getActionName(int actionIndex) std::string Classifier::getActionName(int actionIndex)
...@@ -151,3 +161,8 @@ void Classifier::save(const std::string & filename) ...@@ -151,3 +161,8 @@ void Classifier::save(const std::string & filename)
mlp->save(filename); mlp->save(filename);
} }
void Classifier::save()
{
mlp->save(modelFilename);
}
...@@ -6,27 +6,108 @@ ...@@ -6,27 +6,108 @@
#include "TapeMachine.hpp" #include "TapeMachine.hpp"
#include "Trainer.hpp" #include "Trainer.hpp"
void printUsageAndExit(char * argv[]) namespace po = boost::program_options;
po::options_description getOptionsDescription()
{
po::options_description desc("Command-Line Arguments ");
po::options_description req("Required");
req.add_options()
("tm", po::value<std::string>()->required(),
"File describing the Tape Machine we will train")
("mcd", po::value<std::string>()->required(),
"MCD file that describes the input")
("train,T", po::value<std::string>()->required(),
"Training corpus formated according to the MCD");
po::options_description opt("Optional");
opt.add_options()
("help,h", "Produce this help message")
("devmcd,D", po::value<std::string>()->default_value(""),
"MCD file that describes the input")
("dev,D", po::value<std::string>()->default_value(""),
"Development corpus formated according to the MCD")
("nbiter,n", po::value<int>()->default_value(5),
"Number of training epochs (iterations)")
("batchsize,b", po::value<int>()->default_value(256),
"Size of each training batch (in number of examples)")
("shuffle", po::value<bool>()->default_value(true),
"Shuffle examples after each iteration");
desc.add(req).add(opt);
return desc;
}
po::variables_map checkOptions(po::options_description & od, int argc, char ** argv)
{
po::variables_map vm;
try {po::store(po::parse_command_line(argc, argv, od), vm);}
catch(std::exception& e)
{
std::cerr << "Error: " << e.what() << "\n";
od.print(std::cerr);
exit(1);
}
if (vm.count("help"))
{
std::cout << od << "\n";
exit(0);
}
try {po::notify(vm);}
catch(std::exception& e)
{ {
fprintf(stderr, "USAGE : %s mcd inputFile tm\n", *argv); std::cerr << "Error: " << e.what() << "\n";
od.print(std::cerr);
exit(1); exit(1);
} }
return vm;
}
int main(int argc, char * argv[]) int main(int argc, char * argv[])
{ {
if (argc != 4) auto od = getOptionsDescription();
printUsageAndExit(argv);
po::variables_map vm = checkOptions(od, argc, argv);
MCD mcd(argv[1]); std::string trainMCDfilename = vm["mcd"].as<std::string>();
Config config(mcd); std::string devMCDfilename = vm["devmcd"].as<std::string>();
std::string tmFilename = vm["tm"].as<std::string>();
std::string trainFilename = vm["train"].as<std::string>();
std::string devFilename = vm["dev"].as<std::string>();
int nbIter = vm["nbiter"].as<int>();
int batchSize = vm["batchsize"].as<int>();
bool mustShuffle = vm["shuffle"].as<bool>();
TapeMachine tapeMachine(argv[3], true); TapeMachine tapeMachine(tmFilename, true);
config.readInput(argv[2]); MCD trainMcd(trainMCDfilename);
Config trainConfig(trainMcd);
trainConfig.readInput(trainFilename);
Trainer trainer(tapeMachine, mcd, config); std::unique_ptr<MCD> devMcd;
std::unique_ptr<Config> devConfig;
std::unique_ptr<Trainer> trainer;
if(devFilename.empty() || devMCDfilename.empty())
{
trainer.reset(new Trainer(tapeMachine, trainMcd, trainConfig));
}
else
{
devMcd.reset(new MCD(devMCDfilename));
devConfig.reset(new Config(*devMcd.get()));
devConfig->readInput(devFilename);
trainer.reset(new Trainer(tapeMachine, trainMcd, trainConfig, devMcd.get(), devConfig.get()));
}
trainer.train(); trainer->train(nbIter, batchSize, mustShuffle);
return 0; return 0;
} }
......
...@@ -10,18 +10,40 @@ class Trainer ...@@ -10,18 +10,40 @@ class Trainer
private : private :
TapeMachine & tm; TapeMachine & tm;
MCD & mcd; MCD & trainMcd;
Config & config; Config & trainConfig;
MCD * devMcd;
Config * devConfig;
public :
using FD = FeatureModel::FeatureDescription;
using Example = std::pair<int, FD>;
using ExamplesIter = std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator;
private : private :
void trainUnbatched(); void trainBatched(int nbIter, int batchSize, bool mustShuffle);
void trainBatched(); void getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config);
void processAllExamples(
std::map<Classifier*, MLP::Examples> & examples,
int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch);
void printIterationScores(FILE * output,
std::map< std::string, std::pair<int, int> > & nbExamplesTrain,
std::map< std::string, std::pair<int, int> > & nbExamplesDev,
int nbIter, int curIter);
void shuffleAllExamples(std::map<Classifier*, MLP::Examples > &);
public : public :
Trainer(TapeMachine & tm, MCD & mcd, Config & config); Trainer(TapeMachine & tm, MCD & mcd, Config & config);
void train(); Trainer(TapeMachine & tm, MCD & mcd, Config & config, MCD * devMcd, Config * devConfig);
void train(int nbIter, int batchSize, bool mustShuffle);
}; };
#endif #endif
#include "Trainer.hpp" #include "Trainer.hpp"
Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config) Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config)
: tm(tm), mcd(mcd), config(config) : tm(tm), trainMcd(mcd), trainConfig(config)
{ {
this->devMcd = nullptr;
this->devConfig = nullptr;
} }
void Trainer::trainUnbatched() Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config, MCD * devMcd, Config * devConfig) : tm(tm), trainMcd(mcd), trainConfig(config), devMcd(devMcd), devConfig(devConfig)
{ {
int nbIter = 20;
fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str()); }
for (int i = 0; i < nbIter; i++) void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config)
{ {
std::map< std::string, std::pair<int, int> > nbExamples;
while (!config.isFinal()) while (!config.isFinal())
{ {
TapeMachine::State * currentState = tm.getCurrentState(); TapeMachine::State * currentState = tm.getCurrentState();
Classifier * classifier = currentState->classifier; Classifier * classifier = currentState->classifier;
classifier->initClassifier(config);
//config.printForDebug(stderr); int neededActionIndex = classifier->getOracleActionIndex(config);
std::string neededActionName = classifier->getActionName(neededActionIndex);
//fprintf(stderr, "State : \'%s\'\n", currentState->name.c_str());
std::string neededActionName = classifier->getOracleAction(config);
auto weightedActions = classifier->weightActions(config, neededActionName);
//Classifier::printWeightedActions(stderr, weightedActions);
std::string & predictedAction = weightedActions[0].second;
nbExamples[classifier->name].first++;
if(predictedAction == neededActionName)
nbExamples[classifier->name].second++;
//fprintf(stderr, "Action : \'%s\'\n", neededActionName.c_str()); examples[classifier].second.emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config)));
examples[classifier].first.emplace_back(examples[classifier].first.size());
TapeMachine::Transition * transition = tm.getTransition(neededActionName); TapeMachine::Transition * transition = tm.getTransition(neededActionName);
tm.takeTransition(transition); tm.takeTransition(transition);
config.moveHead(transition->headMvt); config.moveHead(transition->headMvt);
} }
fprintf(stderr, "Iteration %d/%d :\n", i+1, nbIter);
for(auto & it : nbExamples)
fprintf(stderr, "\t%s %.2f%% accuracy\n", it.first.c_str(), 100.0*it.second.second / it.second.first);
config.reset();
}
auto & classifiers = tm.getClassifiers();
for(Classifier * cla : classifiers)
cla->save("toto.txt");
} }
void Trainer::trainBatched() void Trainer::processAllExamples(
std::map<Classifier*, MLP::Examples> & examples,
int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch)
{ {
using FD = FeatureModel::FeatureDescription; for(auto & it : examples)
using Example = std::pair<int, FD>; {
int nbBatches = (it.second.second.size() / batchSize) + (it.second.second.size() % batchSize ? 1 : 0);
std::map<Classifier*, std::vector<Example> > examples;
fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str());
while (!config.isFinal()) for(int numBatch = 0; numBatch < nbBatches; numBatch++)
{ {
TapeMachine::State * currentState = tm.getCurrentState(); int currentBatchSize = std::min<int>(batchSize, it.second.second.size() - (numBatch * batchSize));
Classifier * classifier = currentState->classifier;
classifier->initClassifier(config);
int neededActionIndex = classifier->getOracleActionIndex(config); int batchStart = numBatch * batchSize;
std::string neededActionName = classifier->getActionName(neededActionIndex); int batchEnd = batchStart + currentBatchSize;
examples[classifier].emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config))); int nbCorrect = getScoreOnBatch(it.first, examples[it.first], batchStart, batchEnd);
TapeMachine::Transition * transition = tm.getTransition(neededActionName); nbExamples[it.first->name].first += currentBatchSize;
tm.takeTransition(transition); nbExamples[it.first->name].second += nbCorrect;
config.moveHead(transition->headMvt); }
}
} }
int nbIter = 5; void Trainer::printIterationScores(FILE * output,
int batchSize = 256; std::map< std::string, std::pair<int, int> > & nbExamplesTrain,
std::map< std::string, std::pair<int, int> > & nbExamplesDev,
for (int i = 0; i < nbIter; i++) int nbIter, int curIter)
{ {
std::map< std::string, std::pair<int, int> > nbExamples; fprintf(output, "Iteration %d/%d :\n", curIter+1, nbIter);
for(auto & it : nbExamplesTrain)
{
float scoreTrain = 100.0*it.second.second / it.second.first;
float scoreDev = devConfig ? 100.0*nbExamplesDev[it.first].second / nbExamplesDev[it.first].first : -1.0;
if (devConfig)
fprintf(output, "\t%s accuracy : train(%.2f%%) dev(%.2f%%)\n", it.first.c_str(), scoreTrain, scoreDev);
else
fprintf(output, "\t%s accuracy : train(%.2f%%)\n", it.first.c_str(), scoreTrain);
}
}
void Trainer::shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples)
{
for (auto & it : examples) for (auto & it : examples)
std::random_shuffle(it.second.first.begin(), it.second.first.end());
}
void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle)
{ {
int nbBatches = (it.second.size() / batchSize) + (it.second.size() % batchSize ? 1 : 0); std::map<Classifier*, MLP::Examples > trainExamples;
std::map<Classifier*, MLP::Examples > devExamples;
for(int numBatch = 0; numBatch < nbBatches; numBatch++) fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str());
getExamplesByClassifier(trainExamples, trainConfig);
if(devMcd && devConfig)
getExamplesByClassifier(devExamples, *devConfig);
for (int i = 0; i < nbIter; i++)
{ {
int currentBatchSize = std::min<int>(batchSize, it.second.size() - (numBatch * batchSize)); std::map< std::string, std::pair<int, int> > nbExamplesTrain;
std::map< std::string, std::pair<int, int> > nbExamplesDev;
auto batchStart = it.second.begin() + numBatch * batchSize; if(mustShuffle)
auto batchEnd = batchStart + currentBatchSize; shuffleAllExamples(trainExamples);
int nbCorrect = it.first->trainOnBatch(batchStart, batchEnd); processAllExamples(trainExamples, batchSize, nbExamplesTrain,
[](Classifier * c, MLP::Examples & ex, int s, int e)
{
return c->trainOnBatch(ex, s, e);
});
nbExamples[it.first->name].first += currentBatchSize; processAllExamples(devExamples, batchSize, nbExamplesDev,
nbExamples[it.first->name].second += nbCorrect; [](Classifier * c, MLP::Examples & ex, int s, int e)
} {
} return c->getScoreOnBatch(ex, s, e);
});
fprintf(stderr, "Iteration %d/%d :\n", i+1, nbIter); printIterationScores(stderr, nbExamplesTrain, nbExamplesDev, nbIter, i);
for(auto & it : nbExamples)
fprintf(stderr, "\t%s %.2f%% accuracy\n", it.first.c_str(), 100.0*it.second.second / it.second.first);
} }
auto & classifiers = tm.getClassifiers(); auto & classifiers = tm.getClassifiers();
for(Classifier * cla : classifiers) for(Classifier * cla : classifiers)
cla->save("toto.txt"); cla->save();
} }
void Trainer::train() void Trainer::train(int nbIter, int batchSize, bool mustShuffle)
{ {
//trainUnbatched(); trainBatched(nbIter, batchSize, mustShuffle);
trainBatched();
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment