diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index 14f174d6e4375ea2503f73f78c8eeb45586fc60d..5723d753c213dc6cce17271d5898f62f287db35b 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -12,6 +12,8 @@ class MLP { public : + using Examples = std::pair< std::vector<int>, std::vector<std::pair<int, FeatureModel::FeatureDescription> > >; + enum Activation { SIGMOID, @@ -72,7 +74,8 @@ class MLP MLP(const std::string & filename); std::vector<float> predict(FeatureModel::FeatureDescription & fd, int goldClass); - int trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end); + int trainOnBatch(Examples & examples, int start, int end); + int getScoreOnBatch(Examples & examples, int start, int end); void save(const std::string & filename); }; diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp index 852895421bf3f7e4197ee8ac7de881ff6c9e83bd..0222e96b20e58f5d599598876951e5b1ad0f614c 100644 --- a/MLP/src/MLP.cpp +++ b/MLP/src/MLP.cpp @@ -255,7 +255,7 @@ void MLP::printParameters(FILE * output) fprintf(output, "Parameters : NOT IMPLEMENTED\n"); } -int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end) +int MLP::trainOnBatch(Examples & examples, int start, int end) { dynet::ComputationGraph cg; std::vector<dynet::Expression> inputs; @@ -263,17 +263,21 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio int inputDim = 0; int outputDim = layers.back().output_dim; - for(auto it = start; it != end; it++) + for(int i = start; i < end; i++) { + auto & order = examples.first; + int exampleIndex = order[i]; + auto & example = examples.second[exampleIndex]; + std::vector<dynet::Expression> expressions; expressions.clear(); - for (auto & featValue : it->second.values) + for (auto & featValue : example.second.values) expressions.emplace_back(featValue2Expression(cg, featValue)); inputs.emplace_back(dynet::concatenate(expressions)); inputDim = inputs.back().dim().rows(); - goldClasses.emplace_back((unsigned)it->first); + goldClasses.emplace_back((unsigned)example.first); } dynet::Expression concatenation = dynet::concatenate(inputs); @@ -309,6 +313,56 @@ int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescriptio return nbCorrect; } +int MLP::getScoreOnBatch(Examples & examples, int start, int end) +{ + dynet::ComputationGraph cg; + std::vector<dynet::Expression> inputs; + std::vector<unsigned int> goldClasses; + int inputDim = 0; + int outputDim = layers.back().output_dim; + + for(int i = start; i < end; i++) + { + auto & order = examples.first; + int exampleIndex = order[i]; + auto & example = examples.second[exampleIndex]; + + std::vector<dynet::Expression> expressions; + expressions.clear(); + + for (auto & featValue : example.second.values) + expressions.emplace_back(featValue2Expression(cg, featValue)); + + inputs.emplace_back(dynet::concatenate(expressions)); + inputDim = inputs.back().dim().rows(); + goldClasses.emplace_back((unsigned)example.first); + } + + dynet::Expression concatenation = dynet::concatenate(inputs); + int batchSize = end - start; + + dynet::Expression batchedInput = reshape((concatenation), + dynet::Dim({(unsigned)inputDim}, batchSize)); + + dynet::Expression output = run(cg, batchedInput); + + int nbCorrect = 0; + std::vector<float> predictions = as_vector(output.value()); + for (unsigned int i = 0; (int)i < batchSize; i++) + { + int prediction = 0; + + for (unsigned int j = 0; (int)j < outputDim; j++) + if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction]) + prediction = (int)j; + + if(prediction == (int)goldClasses[i]) + nbCorrect++; + } + + return nbCorrect; +} + void MLP::save(const std::string & filename) { saveStruct(filename); diff --git a/tape_machine/include/Classifier.hpp b/tape_machine/include/Classifier.hpp index 17737346422e446f1a6f25b6973e874bfa3d779c..fc4da733310eb0756416460ec76d3b73785ad25d 100644 --- a/tape_machine/include/Classifier.hpp +++ b/tape_machine/include/Classifier.hpp @@ -31,6 +31,11 @@ class Classifier std::unique_ptr<ActionSet> as; std::unique_ptr<MLP> mlp; Oracle * oracle; + std::string modelFilename; + + private : + + void save(const std::string & filename); public : @@ -42,10 +47,11 @@ class Classifier FeatureModel::FeatureDescription getFeatureDescription(Config & config); std::string getOracleAction(Config & config); int getOracleActionIndex(Config & config); - int trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end); + int getScoreOnBatch(MLP::Examples & examples, int start, int end); + int trainOnBatch(MLP::Examples & examples, int start, int end); std::string getActionName(int actionIndex); void initClassifier(Config & config); - void save(const std::string & filename); + void save(); }; #endif diff --git a/tape_machine/src/Classifier.cpp b/tape_machine/src/Classifier.cpp index f6fa917cd9621bcb79d36571cf1778351698c2bb..250e38fa2e01fe690dab70ad9df65230e6328580 100644 --- a/tape_machine/src/Classifier.cpp +++ b/tape_machine/src/Classifier.cpp @@ -42,6 +42,11 @@ Classifier::Classifier(const std::string & filename, bool trainMode) badFormatAndAbort(ERRINFO); oracle = Oracle::getOracle(buffer); + + if(fscanf(fd, "Model : %s\n", buffer) != 1) + badFormatAndAbort(ERRINFO); + + modelFilename = buffer; } Classifier::Type Classifier::str2type(const std::string & s) @@ -121,9 +126,14 @@ int Classifier::getOracleActionIndex(Config & config) return as->getActionIndex(oracle->getAction(config)); } -int Classifier::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end) +int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end) { - return mlp->trainOnBatch(start, end); + return mlp->trainOnBatch(examples, start, end); +} + +int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end) +{ + return mlp->getScoreOnBatch(examples, start, end); } std::string Classifier::getActionName(int actionIndex) @@ -151,3 +161,8 @@ void Classifier::save(const std::string & filename) mlp->save(filename); } +void Classifier::save() +{ + mlp->save(modelFilename); +} + diff --git a/tests/src/test_train.cpp b/tests/src/test_train.cpp index 764bda551088e4a37fc243fe5de232464aabae65..6140f34920f5f4a9563b3d90800e909912490058 100644 --- a/tests/src/test_train.cpp +++ b/tests/src/test_train.cpp @@ -6,27 +6,108 @@ #include "TapeMachine.hpp" #include "Trainer.hpp" -void printUsageAndExit(char * argv[]) -{ - fprintf(stderr, "USAGE : %s mcd inputFile tm\n", *argv); - exit(1); +namespace po = boost::program_options; + +po::options_description getOptionsDescription() +{ + po::options_description desc("Command-Line Arguments "); + + po::options_description req("Required"); + req.add_options() + ("tm", po::value<std::string>()->required(), + "File describing the Tape Machine we will train") + ("mcd", po::value<std::string>()->required(), + "MCD file that describes the input") + ("train,T", po::value<std::string>()->required(), + "Training corpus formated according to the MCD"); + + po::options_description opt("Optional"); + opt.add_options() + ("help,h", "Produce this help message") + ("devmcd,D", po::value<std::string>()->default_value(""), + "MCD file that describes the input") + ("dev,D", po::value<std::string>()->default_value(""), + "Development corpus formated according to the MCD") + ("nbiter,n", po::value<int>()->default_value(5), + "Number of training epochs (iterations)") + ("batchsize,b", po::value<int>()->default_value(256), + "Size of each training batch (in number of examples)") + ("shuffle", po::value<bool>()->default_value(true), + "Shuffle examples after each iteration"); + + desc.add(req).add(opt); + + return desc; +} + +po::variables_map checkOptions(po::options_description & od, int argc, char ** argv) +{ + po::variables_map vm; + + try {po::store(po::parse_command_line(argc, argv, od), vm);} + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << "\n"; + od.print(std::cerr); + exit(1); + } + + if (vm.count("help")) + { + std::cout << od << "\n"; + exit(0); + } + + try {po::notify(vm);} + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << "\n"; + od.print(std::cerr); + exit(1); + } + + return vm; } int main(int argc, char * argv[]) { - if (argc != 4) - printUsageAndExit(argv); + auto od = getOptionsDescription(); + + po::variables_map vm = checkOptions(od, argc, argv); + + std::string trainMCDfilename = vm["mcd"].as<std::string>(); + std::string devMCDfilename = vm["devmcd"].as<std::string>(); + std::string tmFilename = vm["tm"].as<std::string>(); + std::string trainFilename = vm["train"].as<std::string>(); + std::string devFilename = vm["dev"].as<std::string>(); + int nbIter = vm["nbiter"].as<int>(); + int batchSize = vm["batchsize"].as<int>(); + bool mustShuffle = vm["shuffle"].as<bool>(); + + TapeMachine tapeMachine(tmFilename, true); - MCD mcd(argv[1]); - Config config(mcd); + MCD trainMcd(trainMCDfilename); + Config trainConfig(trainMcd); + trainConfig.readInput(trainFilename); - TapeMachine tapeMachine(argv[3], true); + std::unique_ptr<MCD> devMcd; + std::unique_ptr<Config> devConfig; - config.readInput(argv[2]); + std::unique_ptr<Trainer> trainer; - Trainer trainer(tapeMachine, mcd, config); + if(devFilename.empty() || devMCDfilename.empty()) + { + trainer.reset(new Trainer(tapeMachine, trainMcd, trainConfig)); + } + else + { + devMcd.reset(new MCD(devMCDfilename)); + devConfig.reset(new Config(*devMcd.get())); + devConfig->readInput(devFilename); + trainer.reset(new Trainer(tapeMachine, trainMcd, trainConfig, devMcd.get(), devConfig.get())); + } - trainer.train(); + trainer->train(nbIter, batchSize, mustShuffle); return 0; } diff --git a/trainer/include/Trainer.hpp b/trainer/include/Trainer.hpp index 93c41d12699b53b10fd281bba35e87934e42e945..b12df28656ad808e066642b128b5539d20be4e33 100644 --- a/trainer/include/Trainer.hpp +++ b/trainer/include/Trainer.hpp @@ -10,18 +10,40 @@ class Trainer private : TapeMachine & tm; - MCD & mcd; - Config & config; + MCD & trainMcd; + Config & trainConfig; + + MCD * devMcd; + Config * devConfig; + + public : + + using FD = FeatureModel::FeatureDescription; + using Example = std::pair<int, FD>; + using ExamplesIter = std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator; private : - void trainUnbatched(); - void trainBatched(); + void trainBatched(int nbIter, int batchSize, bool mustShuffle); + void getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config); + +void processAllExamples( + std::map<Classifier*, MLP::Examples> & examples, + int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples, + std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch); + +void printIterationScores(FILE * output, + std::map< std::string, std::pair<int, int> > & nbExamplesTrain, + std::map< std::string, std::pair<int, int> > & nbExamplesDev, + int nbIter, int curIter); + + void shuffleAllExamples(std::map<Classifier*, MLP::Examples > &); public : Trainer(TapeMachine & tm, MCD & mcd, Config & config); - void train(); + Trainer(TapeMachine & tm, MCD & mcd, Config & config, MCD * devMcd, Config * devConfig); + void train(int nbIter, int batchSize, bool mustShuffle); }; #endif diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp index fdb364c3a77fd97d03004442fe00da68ad9342a1..c5b6c98dcf9b1af4d5ab24278a9025b43bf804ef 100644 --- a/trainer/src/Trainer.cpp +++ b/trainer/src/Trainer.cpp @@ -1,120 +1,126 @@ #include "Trainer.hpp" Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config) -: tm(tm), mcd(mcd), config(config) +: tm(tm), trainMcd(mcd), trainConfig(config) { + this->devMcd = nullptr; + this->devConfig = nullptr; } -void Trainer::trainUnbatched() +Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config, MCD * devMcd, Config * devConfig) : tm(tm), trainMcd(mcd), trainConfig(config), devMcd(devMcd), devConfig(devConfig) { - int nbIter = 20; - fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str()); +} - for (int i = 0; i < nbIter; i++) +void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config) +{ + while (!config.isFinal()) { - std::map< std::string, std::pair<int, int> > nbExamples; - - while (!config.isFinal()) - { - TapeMachine::State * currentState = tm.getCurrentState(); - Classifier * classifier = currentState->classifier; + TapeMachine::State * currentState = tm.getCurrentState(); + Classifier * classifier = currentState->classifier; + classifier->initClassifier(config); - //config.printForDebug(stderr); + int neededActionIndex = classifier->getOracleActionIndex(config); + std::string neededActionName = classifier->getActionName(neededActionIndex); - //fprintf(stderr, "State : \'%s\'\n", currentState->name.c_str()); + examples[classifier].second.emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config))); + examples[classifier].first.emplace_back(examples[classifier].first.size()); - std::string neededActionName = classifier->getOracleAction(config); - auto weightedActions = classifier->weightActions(config, neededActionName); - //Classifier::printWeightedActions(stderr, weightedActions); - std::string & predictedAction = weightedActions[0].second; + TapeMachine::Transition * transition = tm.getTransition(neededActionName); + tm.takeTransition(transition); + config.moveHead(transition->headMvt); + } +} - nbExamples[classifier->name].first++; - if(predictedAction == neededActionName) - nbExamples[classifier->name].second++; +void Trainer::processAllExamples( + std::map<Classifier*, MLP::Examples> & examples, + int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples, + std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch) +{ + for(auto & it : examples) + { + int nbBatches = (it.second.second.size() / batchSize) + (it.second.second.size() % batchSize ? 1 : 0); - //fprintf(stderr, "Action : \'%s\'\n", neededActionName.c_str()); + for(int numBatch = 0; numBatch < nbBatches; numBatch++) + { + int currentBatchSize = std::min<int>(batchSize, it.second.second.size() - (numBatch * batchSize)); - TapeMachine::Transition * transition = tm.getTransition(neededActionName); - tm.takeTransition(transition); - config.moveHead(transition->headMvt); - } + int batchStart = numBatch * batchSize; + int batchEnd = batchStart + currentBatchSize; - fprintf(stderr, "Iteration %d/%d :\n", i+1, nbIter); - for(auto & it : nbExamples) - fprintf(stderr, "\t%s %.2f%% accuracy\n", it.first.c_str(), 100.0*it.second.second / it.second.first); + int nbCorrect = getScoreOnBatch(it.first, examples[it.first], batchStart, batchEnd); - config.reset(); + nbExamples[it.first->name].first += currentBatchSize; + nbExamples[it.first->name].second += nbCorrect; + } } +} - auto & classifiers = tm.getClassifiers(); - for(Classifier * cla : classifiers) - cla->save("toto.txt"); +void Trainer::printIterationScores(FILE * output, + std::map< std::string, std::pair<int, int> > & nbExamplesTrain, + std::map< std::string, std::pair<int, int> > & nbExamplesDev, + int nbIter, int curIter) +{ + fprintf(output, "Iteration %d/%d :\n", curIter+1, nbIter); + for(auto & it : nbExamplesTrain) + { + float scoreTrain = 100.0*it.second.second / it.second.first; + float scoreDev = devConfig ? 100.0*nbExamplesDev[it.first].second / nbExamplesDev[it.first].first : -1.0; + if (devConfig) + fprintf(output, "\t%s accuracy : train(%.2f%%) dev(%.2f%%)\n", it.first.c_str(), scoreTrain, scoreDev); + else + fprintf(output, "\t%s accuracy : train(%.2f%%)\n", it.first.c_str(), scoreTrain); + } } -void Trainer::trainBatched() +void Trainer::shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples) { - using FD = FeatureModel::FeatureDescription; - using Example = std::pair<int, FD>; + for (auto & it : examples) + std::random_shuffle(it.second.first.begin(), it.second.first.end()); +} - std::map<Classifier*, std::vector<Example> > examples; +void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle) +{ + std::map<Classifier*, MLP::Examples > trainExamples; + std::map<Classifier*, MLP::Examples > devExamples; fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str()); - while (!config.isFinal()) - { - TapeMachine::State * currentState = tm.getCurrentState(); - Classifier * classifier = currentState->classifier; - classifier->initClassifier(config); - - int neededActionIndex = classifier->getOracleActionIndex(config); - std::string neededActionName = classifier->getActionName(neededActionIndex); - - examples[classifier].emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config))); - - TapeMachine::Transition * transition = tm.getTransition(neededActionName); - tm.takeTransition(transition); - config.moveHead(transition->headMvt); - } + getExamplesByClassifier(trainExamples, trainConfig); - int nbIter = 5; - int batchSize = 256; + if(devMcd && devConfig) + getExamplesByClassifier(devExamples, *devConfig); for (int i = 0; i < nbIter; i++) { - std::map< std::string, std::pair<int, int> > nbExamples; + std::map< std::string, std::pair<int, int> > nbExamplesTrain; + std::map< std::string, std::pair<int, int> > nbExamplesDev; - for(auto & it : examples) - { - int nbBatches = (it.second.size() / batchSize) + (it.second.size() % batchSize ? 1 : 0); + if(mustShuffle) + shuffleAllExamples(trainExamples); - for(int numBatch = 0; numBatch < nbBatches; numBatch++) + processAllExamples(trainExamples, batchSize, nbExamplesTrain, + [](Classifier * c, MLP::Examples & ex, int s, int e) { - int currentBatchSize = std::min<int>(batchSize, it.second.size() - (numBatch * batchSize)); - - auto batchStart = it.second.begin() + numBatch * batchSize; - auto batchEnd = batchStart + currentBatchSize; + return c->trainOnBatch(ex, s, e); + }); - int nbCorrect = it.first->trainOnBatch(batchStart, batchEnd); - - nbExamples[it.first->name].first += currentBatchSize; - nbExamples[it.first->name].second += nbCorrect; - } - } + processAllExamples(devExamples, batchSize, nbExamplesDev, + [](Classifier * c, MLP::Examples & ex, int s, int e) + { + return c->getScoreOnBatch(ex, s, e); + }); - fprintf(stderr, "Iteration %d/%d :\n", i+1, nbIter); - for(auto & it : nbExamples) - fprintf(stderr, "\t%s %.2f%% accuracy\n", it.first.c_str(), 100.0*it.second.second / it.second.first); + printIterationScores(stderr, nbExamplesTrain, nbExamplesDev, nbIter, i); } auto & classifiers = tm.getClassifiers(); for(Classifier * cla : classifiers) - cla->save("toto.txt"); + cla->save(); } -void Trainer::train() +void Trainer::train(int nbIter, int batchSize, bool mustShuffle) { - //trainUnbatched(); - trainBatched(); + trainBatched(nbIter, batchSize, mustShuffle); }