Skip to content
Snippets Groups Projects
Commit 32459c5a authored by Franck Dary's avatar Franck Dary
Browse files

Added command line arguments for train and many options

parent b3f744f5
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,8 @@ class MLP
{
public :
using Examples = std::pair< std::vector<int>, std::vector<std::pair<int, FeatureModel::FeatureDescription> > >;
enum Activation
{
SIGMOID,
......@@ -72,7 +74,8 @@ class MLP
MLP(const std::string & filename);
std::vector<float> predict(FeatureModel::FeatureDescription & fd, int goldClass);
int trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end);
int trainOnBatch(Examples & examples, int start, int end);
int getScoreOnBatch(Examples & examples, int start, int end);
void save(const std::string & filename);
};
......
......@@ -255,7 +255,7 @@ void MLP::printParameters(FILE * output)
fprintf(output, "Parameters : NOT IMPLEMENTED\n");
}
int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end)
int MLP::trainOnBatch(Examples & examples, int start, int end)
{
dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs;
......@@ -263,17 +263,21 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio
int inputDim = 0;
int outputDim = layers.back().output_dim;
for(auto it = start; it != end; it++)
for(int i = start; i < end; i++)
{
auto & order = examples.first;
int exampleIndex = order[i];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions;
expressions.clear();
for (auto & featValue : it->second.values)
for (auto & featValue : example.second.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)it->first);
goldClasses.emplace_back((unsigned)example.first);
}
dynet::Expression concatenation = dynet::concatenate(inputs);
......@@ -309,6 +313,56 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio
return nbCorrect;
}
int MLP::getScoreOnBatch(Examples & examples, int start, int end)
{
dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs;
std::vector<unsigned int> goldClasses;
int inputDim = 0;
int outputDim = layers.back().output_dim;
for(int i = start; i < end; i++)
{
auto & order = examples.first;
int exampleIndex = order[i];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions;
expressions.clear();
for (auto & featValue : example.second.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)example.first);
}
dynet::Expression concatenation = dynet::concatenate(inputs);
int batchSize = end - start;
dynet::Expression batchedInput = reshape((concatenation),
dynet::Dim({(unsigned)inputDim}, batchSize));
dynet::Expression output = run(cg, batchedInput);
int nbCorrect = 0;
std::vector<float> predictions = as_vector(output.value());
for (unsigned int i = 0; (int)i < batchSize; i++)
{
int prediction = 0;
for (unsigned int j = 0; (int)j < outputDim; j++)
if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
prediction = (int)j;
if(prediction == (int)goldClasses[i])
nbCorrect++;
}
return nbCorrect;
}
void MLP::save(const std::string & filename)
{
saveStruct(filename);
......
......@@ -31,6 +31,11 @@ class Classifier
std::unique_ptr<ActionSet> as;
std::unique_ptr<MLP> mlp;
Oracle * oracle;
std::string modelFilename;
private :
void save(const std::string & filename);
public :
......@@ -42,10 +47,11 @@ class Classifier
FeatureModel::FeatureDescription getFeatureDescription(Config & config);
std::string getOracleAction(Config & config);
int getOracleActionIndex(Config & config);
int trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end);
int getScoreOnBatch(MLP::Examples & examples, int start, int end);
int trainOnBatch(MLP::Examples & examples, int start, int end);
std::string getActionName(int actionIndex);
void initClassifier(Config & config);
void save(const std::string & filename);
void save();
};
#endif
......@@ -42,6 +42,11 @@ Classifier::Classifier(const std::string & filename, bool trainMode)
badFormatAndAbort(ERRINFO);
oracle = Oracle::getOracle(buffer);
if(fscanf(fd, "Model : %s\n", buffer) != 1)
badFormatAndAbort(ERRINFO);
modelFilename = buffer;
}
Classifier::Type Classifier::str2type(const std::string & s)
......@@ -121,9 +126,14 @@ int Classifier::getOracleActionIndex(Config & config)
return as->getActionIndex(oracle->getAction(config));
}
int Classifier::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end)
int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end)
{
return mlp->trainOnBatch(start, end);
return mlp->trainOnBatch(examples, start, end);
}
int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end)
{
return mlp->getScoreOnBatch(examples, start, end);
}
std::string Classifier::getActionName(int actionIndex)
......@@ -151,3 +161,8 @@ void Classifier::save(const std::string & filename)
mlp->save(filename);
}
void Classifier::save()
{
mlp->save(modelFilename);
}
......@@ -6,27 +6,108 @@
#include "TapeMachine.hpp"
#include "Trainer.hpp"
void printUsageAndExit(char * argv[])
namespace po = boost::program_options;
po::options_description getOptionsDescription()
{
po::options_description desc("Command-Line Arguments ");
po::options_description req("Required");
req.add_options()
("tm", po::value<std::string>()->required(),
"File describing the Tape Machine we will train")
("mcd", po::value<std::string>()->required(),
"MCD file that describes the input")
("train,T", po::value<std::string>()->required(),
"Training corpus formated according to the MCD");
po::options_description opt("Optional");
opt.add_options()
("help,h", "Produce this help message")
("devmcd,D", po::value<std::string>()->default_value(""),
"MCD file that describes the input")
("dev,D", po::value<std::string>()->default_value(""),
"Development corpus formated according to the MCD")
("nbiter,n", po::value<int>()->default_value(5),
"Number of training epochs (iterations)")
("batchsize,b", po::value<int>()->default_value(256),
"Size of each training batch (in number of examples)")
("shuffle", po::value<bool>()->default_value(true),
"Shuffle examples after each iteration");
desc.add(req).add(opt);
return desc;
}
po::variables_map checkOptions(po::options_description & od, int argc, char ** argv)
{
po::variables_map vm;
try {po::store(po::parse_command_line(argc, argv, od), vm);}
catch(std::exception& e)
{
std::cerr << "Error: " << e.what() << "\n";
od.print(std::cerr);
exit(1);
}
if (vm.count("help"))
{
std::cout << od << "\n";
exit(0);
}
try {po::notify(vm);}
catch(std::exception& e)
{
fprintf(stderr, "USAGE : %s mcd inputFile tm\n", *argv);
std::cerr << "Error: " << e.what() << "\n";
od.print(std::cerr);
exit(1);
}
return vm;
}
int main(int argc, char * argv[])
{
if (argc != 4)
printUsageAndExit(argv);
auto od = getOptionsDescription();
po::variables_map vm = checkOptions(od, argc, argv);
MCD mcd(argv[1]);
Config config(mcd);
std::string trainMCDfilename = vm["mcd"].as<std::string>();
std::string devMCDfilename = vm["devmcd"].as<std::string>();
std::string tmFilename = vm["tm"].as<std::string>();
std::string trainFilename = vm["train"].as<std::string>();
std::string devFilename = vm["dev"].as<std::string>();
int nbIter = vm["nbiter"].as<int>();
int batchSize = vm["batchsize"].as<int>();
bool mustShuffle = vm["shuffle"].as<bool>();
TapeMachine tapeMachine(argv[3], true);
TapeMachine tapeMachine(tmFilename, true);
config.readInput(argv[2]);
MCD trainMcd(trainMCDfilename);
Config trainConfig(trainMcd);
trainConfig.readInput(trainFilename);
Trainer trainer(tapeMachine, mcd, config);
std::unique_ptr<MCD> devMcd;
std::unique_ptr<Config> devConfig;
std::unique_ptr<Trainer> trainer;
if(devFilename.empty() || devMCDfilename.empty())
{
trainer.reset(new Trainer(tapeMachine, trainMcd, trainConfig));
}
else
{
devMcd.reset(new MCD(devMCDfilename));
devConfig.reset(new Config(*devMcd.get()));
devConfig->readInput(devFilename);
trainer.reset(new Trainer(tapeMachine, trainMcd, trainConfig, devMcd.get(), devConfig.get()));
}
trainer.train();
trainer->train(nbIter, batchSize, mustShuffle);
return 0;
}
......
......@@ -10,18 +10,40 @@ class Trainer
private :
TapeMachine & tm;
MCD & mcd;
Config & config;
MCD & trainMcd;
Config & trainConfig;
MCD * devMcd;
Config * devConfig;
public :
using FD = FeatureModel::FeatureDescription;
using Example = std::pair<int, FD>;
using ExamplesIter = std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator;
private :
void trainUnbatched();
void trainBatched();
void trainBatched(int nbIter, int batchSize, bool mustShuffle);
void getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config);
void processAllExamples(
std::map<Classifier*, MLP::Examples> & examples,
int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch);
void printIterationScores(FILE * output,
std::map< std::string, std::pair<int, int> > & nbExamplesTrain,
std::map< std::string, std::pair<int, int> > & nbExamplesDev,
int nbIter, int curIter);
void shuffleAllExamples(std::map<Classifier*, MLP::Examples > &);
public :
Trainer(TapeMachine & tm, MCD & mcd, Config & config);
void train();
Trainer(TapeMachine & tm, MCD & mcd, Config & config, MCD * devMcd, Config * devConfig);
void train(int nbIter, int batchSize, bool mustShuffle);
};
#endif
#include "Trainer.hpp"
Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config)
: tm(tm), mcd(mcd), config(config)
: tm(tm), trainMcd(mcd), trainConfig(config)
{
this->devMcd = nullptr;
this->devConfig = nullptr;
}
void Trainer::trainUnbatched()
Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config, MCD * devMcd, Config * devConfig) : tm(tm), trainMcd(mcd), trainConfig(config), devMcd(devMcd), devConfig(devConfig)
{
int nbIter = 20;
fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str());
}
for (int i = 0; i < nbIter; i++)
void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config)
{
std::map< std::string, std::pair<int, int> > nbExamples;
while (!config.isFinal())
{
TapeMachine::State * currentState = tm.getCurrentState();
Classifier * classifier = currentState->classifier;
classifier->initClassifier(config);
//config.printForDebug(stderr);
//fprintf(stderr, "State : \'%s\'\n", currentState->name.c_str());
std::string neededActionName = classifier->getOracleAction(config);
auto weightedActions = classifier->weightActions(config, neededActionName);
//Classifier::printWeightedActions(stderr, weightedActions);
std::string & predictedAction = weightedActions[0].second;
nbExamples[classifier->name].first++;
if(predictedAction == neededActionName)
nbExamples[classifier->name].second++;
int neededActionIndex = classifier->getOracleActionIndex(config);
std::string neededActionName = classifier->getActionName(neededActionIndex);
//fprintf(stderr, "Action : \'%s\'\n", neededActionName.c_str());
examples[classifier].second.emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config)));
examples[classifier].first.emplace_back(examples[classifier].first.size());
TapeMachine::Transition * transition = tm.getTransition(neededActionName);
tm.takeTransition(transition);
config.moveHead(transition->headMvt);
}
fprintf(stderr, "Iteration %d/%d :\n", i+1, nbIter);
for(auto & it : nbExamples)
fprintf(stderr, "\t%s %.2f%% accuracy\n", it.first.c_str(), 100.0*it.second.second / it.second.first);
config.reset();
}
auto & classifiers = tm.getClassifiers();
for(Classifier * cla : classifiers)
cla->save("toto.txt");
}
void Trainer::trainBatched()
void Trainer::processAllExamples(
std::map<Classifier*, MLP::Examples> & examples,
int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch)
{
using FD = FeatureModel::FeatureDescription;
using Example = std::pair<int, FD>;
std::map<Classifier*, std::vector<Example> > examples;
fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str());
for(auto & it : examples)
{
int nbBatches = (it.second.second.size() / batchSize) + (it.second.second.size() % batchSize ? 1 : 0);
while (!config.isFinal())
for(int numBatch = 0; numBatch < nbBatches; numBatch++)
{
TapeMachine::State * currentState = tm.getCurrentState();
Classifier * classifier = currentState->classifier;
classifier->initClassifier(config);
int currentBatchSize = std::min<int>(batchSize, it.second.second.size() - (numBatch * batchSize));
int neededActionIndex = classifier->getOracleActionIndex(config);
std::string neededActionName = classifier->getActionName(neededActionIndex);
int batchStart = numBatch * batchSize;
int batchEnd = batchStart + currentBatchSize;
examples[classifier].emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config)));
int nbCorrect = getScoreOnBatch(it.first, examples[it.first], batchStart, batchEnd);
TapeMachine::Transition * transition = tm.getTransition(neededActionName);
tm.takeTransition(transition);
config.moveHead(transition->headMvt);
nbExamples[it.first->name].first += currentBatchSize;
nbExamples[it.first->name].second += nbCorrect;
}
}
}
int nbIter = 5;
int batchSize = 256;
for (int i = 0; i < nbIter; i++)
void Trainer::printIterationScores(FILE * output,
std::map< std::string, std::pair<int, int> > & nbExamplesTrain,
std::map< std::string, std::pair<int, int> > & nbExamplesDev,
int nbIter, int curIter)
{
std::map< std::string, std::pair<int, int> > nbExamples;
fprintf(output, "Iteration %d/%d :\n", curIter+1, nbIter);
for(auto & it : nbExamplesTrain)
{
float scoreTrain = 100.0*it.second.second / it.second.first;
float scoreDev = devConfig ? 100.0*nbExamplesDev[it.first].second / nbExamplesDev[it.first].first : -1.0;
if (devConfig)
fprintf(output, "\t%s accuracy : train(%.2f%%) dev(%.2f%%)\n", it.first.c_str(), scoreTrain, scoreDev);
else
fprintf(output, "\t%s accuracy : train(%.2f%%)\n", it.first.c_str(), scoreTrain);
}
}
void Trainer::shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples)
{
for (auto & it : examples)
std::random_shuffle(it.second.first.begin(), it.second.first.end());
}
void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle)
{
int nbBatches = (it.second.size() / batchSize) + (it.second.size() % batchSize ? 1 : 0);
std::map<Classifier*, MLP::Examples > trainExamples;
std::map<Classifier*, MLP::Examples > devExamples;
for(int numBatch = 0; numBatch < nbBatches; numBatch++)
fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str());
getExamplesByClassifier(trainExamples, trainConfig);
if(devMcd && devConfig)
getExamplesByClassifier(devExamples, *devConfig);
for (int i = 0; i < nbIter; i++)
{
int currentBatchSize = std::min<int>(batchSize, it.second.size() - (numBatch * batchSize));
std::map< std::string, std::pair<int, int> > nbExamplesTrain;
std::map< std::string, std::pair<int, int> > nbExamplesDev;
auto batchStart = it.second.begin() + numBatch * batchSize;
auto batchEnd = batchStart + currentBatchSize;
if(mustShuffle)
shuffleAllExamples(trainExamples);
int nbCorrect = it.first->trainOnBatch(batchStart, batchEnd);
processAllExamples(trainExamples, batchSize, nbExamplesTrain,
[](Classifier * c, MLP::Examples & ex, int s, int e)
{
return c->trainOnBatch(ex, s, e);
});
nbExamples[it.first->name].first += currentBatchSize;
nbExamples[it.first->name].second += nbCorrect;
}
}
processAllExamples(devExamples, batchSize, nbExamplesDev,
[](Classifier * c, MLP::Examples & ex, int s, int e)
{
return c->getScoreOnBatch(ex, s, e);
});
fprintf(stderr, "Iteration %d/%d :\n", i+1, nbIter);
for(auto & it : nbExamples)
fprintf(stderr, "\t%s %.2f%% accuracy\n", it.first.c_str(), 100.0*it.second.second / it.second.first);
printIterationScores(stderr, nbExamplesTrain, nbExamplesDev, nbIter, i);
}
auto & classifiers = tm.getClassifiers();
for(Classifier * cla : classifiers)
cla->save("toto.txt");
cla->save();
}
void Trainer::train()
void Trainer::train(int nbIter, int batchSize, bool mustShuffle)
{
//trainUnbatched();
trainBatched();
trainBatched(nbIter, batchSize, mustShuffle);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment