Skip to content
Snippets Groups Projects
Commit c9a28275 authored by Franck Dary's avatar Franck Dary
Browse files

Allow to specify batchsie in cla files, and changed error detection loss function to l1

parent 006cdfc9
Branches
No related tags found
No related merge requests found
...@@ -38,6 +38,7 @@ class MLPBase ...@@ -38,6 +38,7 @@ class MLPBase
std::vector<FeatureModel::FeatureDescription> fdsContinuous; std::vector<FeatureModel::FeatureDescription> fdsContinuous;
/// @brief gold outputs of the current minibatch. /// @brief gold outputs of the current minibatch.
std::vector< std::vector<float> > goldsContinuous; std::vector< std::vector<float> > goldsContinuous;
int batchSize;
private : private :
...@@ -144,6 +145,7 @@ class MLPBase ...@@ -144,6 +145,7 @@ class MLPBase
void printTopology(FILE * output); void printTopology(FILE * output);
/// @brief Clear the current batch. /// @brief Clear the current batch.
void endOfIteration(); void endOfIteration();
void setBatchSize(int batchSize);
}; };
#endif #endif
...@@ -103,6 +103,9 @@ class NeuralNetwork ...@@ -103,6 +103,9 @@ class NeuralNetwork
/// @brief The dynet model containing the parameters to be trained. /// @brief The dynet model containing the parameters to be trained.
dynet::ParameterCollection model; dynet::ParameterCollection model;
/// @brief The size of batches, if not specified, use the global batchsize.
int batchSize;
protected : protected :
/// @brief Set dynet and srand() seeds. /// @brief Set dynet and srand() seeds.
...@@ -192,6 +195,12 @@ class NeuralNetwork ...@@ -192,6 +195,12 @@ class NeuralNetwork
/// ///
/// \return The number of input neurons taken by fv. /// \return The number of input neurons taken by fv.
static unsigned int featureSize(const FeatureModel::FeatureValue & fv); static unsigned int featureSize(const FeatureModel::FeatureValue & fv);
/// @brief Set the batchSize.
void setBatchSize(int batchSize);
/// @brief Get the batchSize.
int getBatchSize();
}; };
#endif #endif
...@@ -19,6 +19,7 @@ MLP::MLP(const std::string & filename) : mlp("MLP") ...@@ -19,6 +19,7 @@ MLP::MLP(const std::string & filename) : mlp("MLP")
void MLP::init(int nbInputs, const std::string & topology, int nbOutputs) void MLP::init(int nbInputs, const std::string & topology, int nbOutputs)
{ {
setBatchSize(0);
mlp.init(model, nbInputs, topology, nbOutputs); mlp.init(model, nbInputs, topology, nbOutputs);
} }
...@@ -57,6 +58,7 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd) ...@@ -57,6 +58,7 @@ std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd)
float MLP::update(FeatureModel::FeatureDescription & fd, int gold) float MLP::update(FeatureModel::FeatureDescription & fd, int gold)
{ {
mlp.setBatchSize(getBatchSize());
try try
{ {
float loss = mlp.update(fd, gold); float loss = mlp.update(fd, gold);
...@@ -70,6 +72,7 @@ float MLP::update(FeatureModel::FeatureDescription & fd, int gold) ...@@ -70,6 +72,7 @@ float MLP::update(FeatureModel::FeatureDescription & fd, int gold)
float MLP::update(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold) float MLP::update(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold)
{ {
mlp.setBatchSize(getBatchSize());
try try
{ {
float loss = mlp.update(fd, gold); float loss = mlp.update(fd, gold);
...@@ -83,6 +86,7 @@ float MLP::update(FeatureModel::FeatureDescription & fd, const std::vector<float ...@@ -83,6 +86,7 @@ float MLP::update(FeatureModel::FeatureDescription & fd, const std::vector<float
float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold) float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold)
{ {
mlp.setBatchSize(getBatchSize());
try try
{ {
float loss = mlp.getLoss(fd, gold); float loss = mlp.getLoss(fd, gold);
...@@ -95,6 +99,7 @@ float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold) ...@@ -95,6 +99,7 @@ float MLP::getLoss(FeatureModel::FeatureDescription & fd, int gold)
float MLP::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold) float MLP::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold)
{ {
mlp.setBatchSize(getBatchSize());
try try
{ {
float loss = mlp.getLoss(fd, gold); float loss = mlp.getLoss(fd, gold);
......
...@@ -4,6 +4,7 @@ MLPBase::MLPBase(std::string name) ...@@ -4,6 +4,7 @@ MLPBase::MLPBase(std::string name)
{ {
this->name = name; this->name = name;
dropoutActive = true; dropoutActive = true;
batchSize = 0;
} }
void MLPBase::init(dynet::ParameterCollection & model, int nbInputs, const std::string & topology, int nbOutputs) void MLPBase::init(dynet::ParameterCollection & model, int nbInputs, const std::string & topology, int nbOutputs)
...@@ -94,7 +95,9 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold) ...@@ -94,7 +95,9 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold)
fdsOneHot.emplace_back(fd); fdsOneHot.emplace_back(fd);
goldsOneHot.emplace_back(gold); goldsOneHot.emplace_back(gold);
if ((int)fdsOneHot.size() < ProgramParameters::batchSize) int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
if ((int)fdsOneHot.size() < effectiveBatchSize)
throw BatchNotFull(); throw BatchNotFull();
std::vector<dynet::Expression> inputs; std::vector<dynet::Expression> inputs;
...@@ -140,7 +143,9 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f ...@@ -140,7 +143,9 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f
fdsContinuous.emplace_back(fd); fdsContinuous.emplace_back(fd);
goldsContinuous.emplace_back(gold); goldsContinuous.emplace_back(gold);
if ((int)fdsContinuous.size() < ProgramParameters::batchSize) int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
if ((int)fdsContinuous.size() < effectiveBatchSize)
throw BatchNotFull(); throw BatchNotFull();
std::vector<dynet::Expression> inputs; std::vector<dynet::Expression> inputs;
...@@ -165,7 +170,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f ...@@ -165,7 +170,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f
goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({(unsigned int)gold.size()}), gold)); goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({(unsigned int)gold.size()}), gold));
dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions); dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions);
batchedLoss = dynet::sum_batches(dynet::squared_distance(output, batchedGold)); batchedLoss = dynet::sum_batches(dynet::l1_distance(output, batchedGold));
cg.backward(batchedLoss); cg.backward(batchedLoss);
...@@ -182,7 +187,9 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, int gold) ...@@ -182,7 +187,9 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, int gold)
fdsOneHot.emplace_back(fd); fdsOneHot.emplace_back(fd);
goldsOneHot.emplace_back(gold); goldsOneHot.emplace_back(gold);
if ((int)fdsOneHot.size() < ProgramParameters::batchSize) int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
if ((int)fdsOneHot.size() < effectiveBatchSize)
throw BatchNotFull(); throw BatchNotFull();
std::vector<dynet::Expression> inputs; std::vector<dynet::Expression> inputs;
...@@ -226,7 +233,9 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector< ...@@ -226,7 +233,9 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<
fdsContinuous.emplace_back(fd); fdsContinuous.emplace_back(fd);
goldsContinuous.emplace_back(gold); goldsContinuous.emplace_back(gold);
if ((int)fdsContinuous.size() < ProgramParameters::batchSize) int effectiveBatchSize = batchSize ? batchSize : ProgramParameters::batchSize;
if ((int)fdsContinuous.size() < effectiveBatchSize)
throw BatchNotFull(); throw BatchNotFull();
std::vector<dynet::Expression> inputs; std::vector<dynet::Expression> inputs;
...@@ -251,7 +260,7 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector< ...@@ -251,7 +260,7 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<
goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({1,(unsigned int)gold.size()}), gold)); goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({1,(unsigned int)gold.size()}), gold));
dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions); dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions);
batchedLoss = dynet::sum_batches(dynet::squared_distance(output, batchedGold)); batchedLoss = dynet::sum_batches(dynet::l1_distance(output, batchedGold));
checkGradients(); checkGradients();
...@@ -482,3 +491,8 @@ void MLPBase::endOfIteration() ...@@ -482,3 +491,8 @@ void MLPBase::endOfIteration()
goldsContinuous.clear(); goldsContinuous.clear();
} }
void MLPBase::setBatchSize(int batchSize)
{
this->batchSize = batchSize;
}
...@@ -188,3 +188,13 @@ unsigned int NeuralNetwork::featureSize(const FeatureModel::FeatureValue & fv) ...@@ -188,3 +188,13 @@ unsigned int NeuralNetwork::featureSize(const FeatureModel::FeatureValue & fv)
return res; return res;
} }
void NeuralNetwork::setBatchSize(int batchSize)
{
this->batchSize = batchSize;
}
int NeuralNetwork::getBatchSize()
{
return batchSize;
}
...@@ -54,6 +54,8 @@ class Classifier ...@@ -54,6 +54,8 @@ class Classifier
std::shared_ptr<NeuralNetwork> nn; std::shared_ptr<NeuralNetwork> nn;
/// @brief A string describing the topology of the underlying neural network. /// @brief A string describing the topology of the underlying neural network.
std::string topology; std::string topology;
/// @brief The size of a batch. If not specified, use the global batchsize.
int batchSize;
/// @brief The oracle being used by this Classifier. /// @brief The oracle being used by this Classifier.
/// ///
/// For Classifier of type Prediction, the Oracle is used in train mode, to find /// For Classifier of type Prediction, the Oracle is used in train mode, to find
......
...@@ -73,6 +73,10 @@ Classifier::Classifier(const std::string & filename, bool trainMode) ...@@ -73,6 +73,10 @@ Classifier::Classifier(const std::string & filename, bool trainMode)
badFormatAndAbort(ERRINFO); badFormatAndAbort(ERRINFO);
topology = buffer; topology = buffer;
int batchsizeRead = 0;
if(fscanf(fd, "Batchsize : %d\n", &batchsizeRead) == 1)
batchSize = batchsizeRead;
} }
Classifier::Type Classifier::str2type(const std::string & s) Classifier::Type Classifier::str2type(const std::string & s)
...@@ -158,6 +162,8 @@ void Classifier::initClassifier(Config & config) ...@@ -158,6 +162,8 @@ void Classifier::initClassifier(Config & config)
nbInputs += NeuralNetwork::featureSize(feat); nbInputs += NeuralNetwork::featureSize(feat);
nn->init(nbInputs, topology, nbOutputs); nn->init(nbInputs, topology, nbOutputs);
if (batchSize)
nn->setBatchSize(batchSize);
} }
FeatureModel::FeatureDescription Classifier::getFeatureDescription(Config & config) FeatureModel::FeatureDescription Classifier::getFeatureDescription(Config & config)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment