Skip to content
Snippets Groups Projects
Commit a1df6c6f authored by Franck Dary's avatar Franck Dary
Browse files

Refactoring

parent 007ae6d7
Branches
No related tags found
No related merge requests found
...@@ -3,3 +3,4 @@ FILE(GLOB SOURCES src/*.cpp) ...@@ -3,3 +3,4 @@ FILE(GLOB SOURCES src/*.cpp)
#compiling library #compiling library
add_library(MLP STATIC ${SOURCES}) add_library(MLP STATIC ${SOURCES})
target_link_libraries(MLP dynet) target_link_libraries(MLP dynet)
target_link_libraries(MLP transition_machine)
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <dynet/timing.h> #include <dynet/timing.h>
#include <dynet/expr.h> #include <dynet/expr.h>
#include "FeatureModel.hpp" #include "FeatureModel.hpp"
#include "TrainingExamples.hpp"
/// @brief Multi Layer Perceptron. /// @brief Multi Layer Perceptron.
/// ///
...@@ -21,9 +22,6 @@ class MLP ...@@ -21,9 +22,6 @@ class MLP
{ {
public : public :
/// @brief A sequence of training examples, accompanied by the order in which it has to be iterated over.
using Examples = std::pair< std::vector<int>, std::vector<std::pair<int, FeatureModel::FeatureDescription> > >;
/// @brief Activation function for a MLP Layer. /// @brief Activation function for a MLP Layer.
enum Activation enum Activation
{ {
...@@ -194,24 +192,20 @@ class MLP ...@@ -194,24 +192,20 @@ class MLP
/// @return A vector containing one score per possible class. /// @return A vector containing one score per possible class.
std::vector<float> predict(FeatureModel::FeatureDescription & fd); std::vector<float> predict(FeatureModel::FeatureDescription & fd);
/// @brief Train the MLP on a batch of training examples. /// @brief Train the MLP on these TrainingExamples.
/// ///
/// The parameters will be updated by this function. /// @param examples A batch of training examples.
/// @param examples A set of training examples.
/// @param start The index of the first element of the batch.
/// @param end The index of the last element of the batch.
/// ///
/// @return The number of examples for which the class was correctly predicted by the MLP. /// @return The number of these training examples correctly classified by the MLP.
int trainOnBatch(Examples & examples, int start, int end); int trainOnBatch(TrainingExamples & examples);
/// @brief Get the score of the MLP on a batch of training examples.
/// @brief Predict the class of training examples.
/// ///
/// The parameters will not be updated by this function. /// @param examples The training examples.
/// @param examples A set of training examples.
/// @param start The index of the first element of the batch.
/// @param end The index of the last element of the batch.
/// ///
/// @return The number of examples for which the class was correctly predicted by the MLP. /// @return The number of these training examples correctly classified by the MLP.
int getScoreOnBatch(Examples & examples, int start, int end); int getScoreOnBatch(TrainingExamples & examples);
/// @brief Save the MLP to a file. /// @brief Save the MLP to a file.
/// ///
/// @param filename The file to write the MLP to. /// @param filename The file to write the MLP to.
......
...@@ -300,7 +300,7 @@ void MLP::printParameters(FILE * output) ...@@ -300,7 +300,7 @@ void MLP::printParameters(FILE * output)
fprintf(output, "Parameters : NOT IMPLEMENTED\n"); fprintf(output, "Parameters : NOT IMPLEMENTED\n");
} }
int MLP::trainOnBatch(Examples & examples, int start, int end) int MLP::trainOnBatch(TrainingExamples & examples)
{ {
dynet::ComputationGraph cg; dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs; std::vector<dynet::Expression> inputs;
...@@ -308,25 +308,24 @@ int MLP::trainOnBatch(Examples & examples, int start, int end) ...@@ -308,25 +308,24 @@ int MLP::trainOnBatch(Examples & examples, int start, int end)
int inputDim = 0; int inputDim = 0;
int outputDim = layers.back().output_dim; int outputDim = layers.back().output_dim;
for(int i = start; i < end; i++) for(unsigned int i = 0; i < examples.size(); i++)
{ {
auto & order = examples.first; int index = examples.order[i];
int exampleIndex = order[i]; auto & example = examples.examples[index];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions; std::vector<dynet::Expression> expressions;
expressions.clear(); expressions.clear();
for (auto & featValue : example.second.values) for (auto & featValue : example.values)
expressions.emplace_back(featValue2Expression(cg, featValue)); expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions)); inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows(); inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)example.first); goldClasses.emplace_back((unsigned)examples.classes[index]);
} }
dynet::Expression concatenation = dynet::concatenate(inputs); dynet::Expression concatenation = dynet::concatenate(inputs);
int batchSize = end - start; int batchSize = examples.size();
dynet::Expression batchedInput = reshape((concatenation), dynet::Expression batchedInput = reshape((concatenation),
dynet::Dim({(unsigned)inputDim}, batchSize)); dynet::Dim({(unsigned)inputDim}, batchSize));
...@@ -358,7 +357,7 @@ int MLP::trainOnBatch(Examples & examples, int start, int end) ...@@ -358,7 +357,7 @@ int MLP::trainOnBatch(Examples & examples, int start, int end)
return nbCorrect; return nbCorrect;
} }
int MLP::getScoreOnBatch(Examples & examples, int start, int end) int MLP::getScoreOnBatch(TrainingExamples & examples)
{ {
bool currentDropoutActive = dropoutActive; bool currentDropoutActive = dropoutActive;
dropoutActive = false; dropoutActive = false;
...@@ -369,25 +368,24 @@ int MLP::getScoreOnBatch(Examples & examples, int start, int end) ...@@ -369,25 +368,24 @@ int MLP::getScoreOnBatch(Examples & examples, int start, int end)
int inputDim = 0; int inputDim = 0;
int outputDim = layers.back().output_dim; int outputDim = layers.back().output_dim;
for(int i = start; i < end; i++) for(unsigned int i = 0; i < examples.size(); i++)
{ {
auto & order = examples.first; int index = examples.order[i];
int exampleIndex = order[i]; auto & example = examples.examples[index];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions; std::vector<dynet::Expression> expressions;
expressions.clear(); expressions.clear();
for (auto & featValue : example.second.values) for (auto & featValue : example.values)
expressions.emplace_back(featValue2Expression(cg, featValue)); expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions)); inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows(); inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)example.first); goldClasses.emplace_back((unsigned)examples.classes[index]);
} }
dynet::Expression concatenation = dynet::concatenate(inputs); dynet::Expression concatenation = dynet::concatenate(inputs);
int batchSize = end - start; int batchSize = examples.size();
dynet::Expression batchedInput = reshape((concatenation), dynet::Expression batchedInput = reshape((concatenation),
dynet::Dim({(unsigned)inputDim}, batchSize)); dynet::Dim({(unsigned)inputDim}, batchSize));
......
...@@ -8,3 +8,4 @@ install(TARGETS macaon_train DESTINATION bin) ...@@ -8,3 +8,4 @@ install(TARGETS macaon_train DESTINATION bin)
#compiling library #compiling library
add_library(trainer STATIC ${SOURCES}) add_library(trainer STATIC ${SOURCES})
target_link_libraries(trainer transition_machine)
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "TransitionMachine.hpp" #include "TransitionMachine.hpp"
#include "BD.hpp" #include "BD.hpp"
#include "Config.hpp" #include "Config.hpp"
#include "TrainingExamples.hpp"
/// @brief An object capable of training a TransitionMachine given a BD initialized with training examples. /// @brief An object capable of training a TransitionMachine given a BD initialized with training examples.
class Trainer class Trainer
...@@ -40,10 +41,6 @@ class Trainer ...@@ -40,10 +41,6 @@ class Trainer
/// @brief The FeatureDescritpion of a Config. /// @brief The FeatureDescritpion of a Config.
using FD = FeatureModel::FeatureDescription; using FD = FeatureModel::FeatureDescription;
/// @brief A training example, a pair of a class to predict and the FeatureDescription of the corresponding Config.
using Example = std::pair<int, FD>;
/// @brief Iterator type of a vector of training examples.
using ExamplesIter = std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator;
private : private :
...@@ -58,11 +55,13 @@ class Trainer ...@@ -58,11 +55,13 @@ class Trainer
/// @param batchSize The size of each batch (in number of examples). /// @param batchSize The size of each batch (in number of examples).
/// @param mustShuffle Will the examples be shuffled after every epoch ? /// @param mustShuffle Will the examples be shuffled after every epoch ?
void trainBatched(int nbIter, int batchSize, bool mustShuffle); void trainBatched(int nbIter, int batchSize, bool mustShuffle);
/// @brief Extract training examples for all Classifier
/// @brief Uses a TM and a config to create the TrainingExamples that will be used during training.
///
/// @param config The config to use.
/// ///
/// @param examples The map that will be filled by this function. /// @return For each classifier, a set of training examples.
/// @param config The configuration from which the examples will be extracted. std::map<Classifier*,TrainingExamples> getExamplesByClassifier(Config & config);
void getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config);
/// @brief Make each Classifier go over every examples. /// @brief Make each Classifier go over every examples.
/// ///
...@@ -72,9 +71,9 @@ class Trainer ...@@ -72,9 +71,9 @@ class Trainer
/// @param nbExamples Map each trainable Classifier to a count of how many examples it has seen during this epoch and a count of how many of this examples it has correctly classified. This map is filled by this function. /// @param nbExamples Map each trainable Classifier to a count of how many examples it has seen during this epoch and a count of how many of this examples it has correctly classified. This map is filled by this function.
/// @param getScoreOnBatch The MLP function that must be called to get the score of a classifier on a certain batch. /// @param getScoreOnBatch The MLP function that must be called to get the score of a classifier on a certain batch.
void processAllExamples( void processAllExamples(
std::map<Classifier*, MLP::Examples> & examples, std::map<Classifier*, TrainingExamples> & examples,
int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples, int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch); std::function<int(Classifier *, TrainingExamples &)> getScoreOnBatch);
/// @brief Print the score obtained by all Classifier on this epoch. /// @brief Print the score obtained by all Classifier on this epoch.
/// ///
...@@ -97,7 +96,7 @@ class Trainer ...@@ -97,7 +96,7 @@ class Trainer
/// @brief For every Classifier, shuffle its training examples. /// @brief For every Classifier, shuffle its training examples.
/// ///
/// @param examples Map each Classifier to a set of training examples. /// @param examples Map each Classifier to a set of training examples.
void shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples); void shuffleAllExamples(std::map<Classifier*,TrainingExamples> & examples);
public : public :
......
...@@ -13,8 +13,10 @@ Trainer::Trainer(TransitionMachine & tm, BD & bd, Config & config, BD * devBD, C ...@@ -13,8 +13,10 @@ Trainer::Trainer(TransitionMachine & tm, BD & bd, Config & config, BD * devBD, C
} }
void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & examples, Config & config) std::map<Classifier*,TrainingExamples> Trainer::getExamplesByClassifier(Config & config)
{ {
std::map<Classifier*, TrainingExamples> examples;
while (!config.isFinal()) while (!config.isFinal())
{ {
TransitionMachine::State * currentState = tm.getCurrentState(); TransitionMachine::State * currentState = tm.getCurrentState();
...@@ -22,52 +24,46 @@ void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & exa ...@@ -22,52 +24,46 @@ void Trainer::getExamplesByClassifier(std::map<Classifier*, MLP::Examples> & exa
config.setCurrentStateName(&currentState->name); config.setCurrentStateName(&currentState->name);
classifier->initClassifier(config); classifier->initClassifier(config);
//config.printForDebug(stderr);
//fprintf(stderr, "State : %s\n", currentState->name.c_str());
int neededActionIndex = classifier->getOracleActionIndex(config); int neededActionIndex = classifier->getOracleActionIndex(config);
std::string neededActionName = classifier->getActionName(neededActionIndex); std::string neededActionName = classifier->getActionName(neededActionIndex);
if(classifier->needsTrain()) if(classifier->needsTrain())
{ examples[classifier].add(classifier->getFeatureDescription(config), neededActionIndex);
examples[classifier].second.emplace_back(Example(neededActionIndex, classifier->getFeatureDescription(config)));
examples[classifier].first.emplace_back(examples[classifier].first.size());
}
Action * action = classifier->getAction(neededActionName); Action * action = classifier->getAction(neededActionName);
if(!action->appliable(config)) if(!action->appliable(config))
fprintf(stderr, "WARNING (%s) : action \'%s\' is not appliable.\n", ERRINFO, neededActionName.c_str()); fprintf(stderr, "WARNING (%s) : action \'%s\' is not appliable.\n", ERRINFO, neededActionName.c_str());
action->apply(config); action->apply(config);
//fprintf(stderr, "Action : %s\n", neededActionName.c_str());
TransitionMachine::Transition * transition = tm.getTransition(neededActionName); TransitionMachine::Transition * transition = tm.getTransition(neededActionName);
tm.takeTransition(transition); tm.takeTransition(transition);
config.moveHead(transition->headMvt); config.moveHead(transition->headMvt);
} }
return examples;
} }
void Trainer::processAllExamples( void Trainer::processAllExamples(
std::map<Classifier*, MLP::Examples> & examples, std::map<Classifier*, TrainingExamples> & examples,
int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples, int batchSize, std::map< std::string, std::pair<int, int> > & nbExamples,
std::function<int(Classifier *, MLP::Examples &, int, int)> getScoreOnBatch) std::function<int(Classifier *, TrainingExamples &)> getScoreOnBatch)
{ {
for(auto & it : examples) for(auto & it : examples)
{ {
int nbBatches = (it.second.second.size() / batchSize) + (it.second.second.size() % batchSize ? 1 : 0); while(true)
for(int numBatch = 0; numBatch < nbBatches; numBatch++)
{ {
int currentBatchSize = std::min<int>(batchSize, it.second.second.size() - (numBatch * batchSize)); TrainingExamples batch = it.second.getBatch(batchSize);
int batchStart = numBatch * batchSize; if (batch.size() == 0)
int batchEnd = batchStart + currentBatchSize; break;
int nbCorrect = getScoreOnBatch(it.first, examples[it.first], batchStart, batchEnd); int nbCorrects = getScoreOnBatch(it.first, batch);
nbExamples[it.first->name].first += currentBatchSize; nbExamples[it.first->name].first += batch.size();
nbExamples[it.first->name].second += nbCorrect; nbExamples[it.first->name].second += nbCorrects;
} }
it.second.reset();
} }
} }
...@@ -122,23 +118,23 @@ void Trainer::printIterationScores(FILE * output, ...@@ -122,23 +118,23 @@ void Trainer::printIterationScores(FILE * output,
printColumns(output, {names, acc, train, dev, saved}); printColumns(output, {names, acc, train, dev, saved});
} }
void Trainer::shuffleAllExamples(std::map<Classifier*, MLP::Examples > & examples) void Trainer::shuffleAllExamples(std::map<Classifier*, TrainingExamples> & examples)
{ {
for (auto & it : examples) for (auto & it : examples)
std::random_shuffle(it.second.first.begin(), it.second.first.end()); it.second.shuffle();
} }
void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle) void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle)
{ {
std::map<Classifier*, MLP::Examples > trainExamples; std::map<Classifier*,TrainingExamples> trainExamples;
std::map<Classifier*, MLP::Examples > devExamples; std::map<Classifier*,TrainingExamples> devExamples;
fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str()); fprintf(stderr, "Training of \'%s\' :\n", tm.name.c_str());
getExamplesByClassifier(trainExamples, trainConfig); trainExamples = getExamplesByClassifier(trainConfig);
if(devBD && devConfig) if(devBD && devConfig)
getExamplesByClassifier(devExamples, *devConfig); devExamples = getExamplesByClassifier(*devConfig);
auto & classifiers = tm.getClassifiers(); auto & classifiers = tm.getClassifiers();
for(Classifier * cla : classifiers) for(Classifier * cla : classifiers)
...@@ -158,15 +154,15 @@ void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle) ...@@ -158,15 +154,15 @@ void Trainer::trainBatched(int nbIter, int batchSize, bool mustShuffle)
shuffleAllExamples(trainExamples); shuffleAllExamples(trainExamples);
processAllExamples(trainExamples, batchSize, nbExamplesTrain, processAllExamples(trainExamples, batchSize, nbExamplesTrain,
[](Classifier * c, MLP::Examples & ex, int s, int e) [](Classifier * c, TrainingExamples & ex)
{ {
return c->trainOnBatch(ex, s, e); return c->trainOnBatch(ex);
}); });
processAllExamples(devExamples, batchSize, nbExamplesDev, processAllExamples(devExamples, batchSize, nbExamplesDev,
[](Classifier * c, MLP::Examples & ex, int s, int e) [](Classifier * c, TrainingExamples & ex)
{ {
return c->getScoreOnBatch(ex, s, e); return c->getScoreOnBatch(ex);
}); });
printIterationScores(stderr, nbExamplesTrain, nbExamplesDev, printIterationScores(stderr, nbExamplesTrain, nbExamplesDev,
......
...@@ -106,23 +106,18 @@ class Classifier ...@@ -106,23 +106,18 @@ class Classifier
/// ///
/// @return The index of the correct Action to take. /// @return The index of the correct Action to take.
int getOracleActionIndex(Config & config); int getOracleActionIndex(Config & config);
/// @brief Test this Classifier against a batch of training examples. /// @brief Predict the classes of these training examples.
/// ///
/// @param examples A set of training examples. /// @param examples A set of training examples.
/// @param start The index of the start of the batch.
/// @param end The index of the end of the batch.
/// ///
/// @return The number of examples correctly classified by this Classifier. /// @return The number of these training examples whose class has correctly been predicted.
int getScoreOnBatch(MLP::Examples & examples, int start, int end); int getScoreOnBatch(TrainingExamples & examples);
/// @brief Train this Classifier with a batch of training examples. /// @brief Train this classifier of these TrainingExamples.
/// ///
/// This function is similar to getScoreOnBatch, except that it update the neural network parameters in order to fit the batch examples more. /// @param examples a batch of training examples.
/// @param examples A set of training examples.
/// @param start The index of the start of the batch.
/// @param end The index of the end of the batch.
/// ///
/// @return The number of examples correctly classified by this Classifier. /// @return The number of these training examples whose class has correctly been predicted.
int trainOnBatch(MLP::Examples & examples, int start, int end); int trainOnBatch(TrainingExamples & examples);
/// @brief Get the name of an Action from its index. /// @brief Get the name of an Action from its index.
/// ///
/// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network. /// The index of an Action can be seen as the index of the corresponding output neuron in the underlying neural network.
......
/// @file TrainingExamples.hpp
/// @author Franck Dary
/// @version 1.0
/// @date 2018-08-09
#ifndef TRAININGEXAMPLES__H
#define TRAININGEXAMPLES__H
#include <vector>
#include "FeatureModel.hpp"
class TrainingExamples
{
public :
std::vector<unsigned int> order;
std::vector<FeatureModel::FeatureDescription> examples;
std::vector<int> classes;
int nextIndex;
public :
TrainingExamples();
void add(const FeatureModel::FeatureDescription & example, int gold);
unsigned int size();
TrainingExamples getBatch(unsigned int batchSize);
void reset();
void shuffle();
};
#endif
...@@ -156,7 +156,7 @@ int Classifier::getOracleActionIndex(Config & config) ...@@ -156,7 +156,7 @@ int Classifier::getOracleActionIndex(Config & config)
return as->getActionIndex(oracle->getAction(config)); return as->getActionIndex(oracle->getAction(config));
} }
int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end) int Classifier::trainOnBatch(TrainingExamples & examples)
{ {
if(type != Type::Prediction) if(type != Type::Prediction)
{ {
...@@ -164,10 +164,10 @@ int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end) ...@@ -164,10 +164,10 @@ int Classifier::trainOnBatch(MLP::Examples & examples, int start, int end)
exit(1); exit(1);
} }
return mlp->trainOnBatch(examples, start, end); return mlp->trainOnBatch(examples);
} }
int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end) int Classifier::getScoreOnBatch(TrainingExamples & examples)
{ {
if(type != Type::Prediction) if(type != Type::Prediction)
{ {
...@@ -175,7 +175,7 @@ int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end) ...@@ -175,7 +175,7 @@ int Classifier::getScoreOnBatch(MLP::Examples & examples, int start, int end)
exit(1); exit(1);
} }
return mlp->getScoreOnBatch(examples, start, end); return mlp->getScoreOnBatch(examples);
} }
std::string Classifier::getActionName(int actionIndex) std::string Classifier::getActionName(int actionIndex)
......
#include "TrainingExamples.hpp"
#include <algorithm>
TrainingExamples::TrainingExamples()
{
nextIndex = 0;
}
void TrainingExamples::add(const FeatureModel::FeatureDescription & example, int gold)
{
examples.emplace_back(example);
classes.emplace_back(gold);
order.emplace_back(order.size());
}
unsigned int TrainingExamples::size()
{
return examples.size();
}
TrainingExamples TrainingExamples::getBatch(unsigned int batchSize)
{
TrainingExamples batch;
for(unsigned int i = 0; i < batchSize && (unsigned)nextIndex < order.size()-1; i++)
{
batch.add(examples[order[nextIndex]], classes[order[nextIndex]]);
nextIndex++;
}
return batch;
}
void TrainingExamples::reset()
{
nextIndex = 0;
}
void TrainingExamples::shuffle()
{
std::random_shuffle(order.begin(), order.end());
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment