Skip to content
Snippets Groups Projects
Commit 17c4c49b authored by Franck Dary's avatar Franck Dary
Browse files

Added reversed MLP

parent 16dfda1f
No related branches found
No related tags found
No related merge requests found
/// \file ReversedMLP.hpp
/// \author Franck Dary
/// @version 1.0
/// @date 2019-08-08
#ifndef REVERSEDMLP__H
#define REVERSEDMLP__H
#include "NeuralNetwork.hpp"
#include "MLPBase.hpp"
#include "ProgramParameters.hpp"
/// @brief Classifier consisting in 2 MLP, can be trained with negative or positive classes.
///
/// It is capable of training itself given a batch of examples.\n
/// Once trained, it can also be used to predict the class of a certain input.
class ReversedMLP : public NeuralNetwork
{
private :
/// @brief The mlp that will be trained using positive gold classes.
MLPBase mlpPos;
/// @brief The mlp that will be trained using negative gold classes.
MLPBase mlpNeg;
/// @brief The training algorithm that will be used.
std::unique_ptr<dynet::Trainer> trainer;
public :
/// @brief initialize a new untrained ReversedMLP from a desired topology.
///
/// topology example for 2 hidden layers : (150,RELU,0.3)(50,ELU,0.2)\n
/// Of sizes 150 and 50, activation functions RELU and ELU, and dropout rates
/// of 0.3 and 0.2.
/// @param nbInputs The size of the input layer of the MLP.
/// @param topology Description of each hidden Layer of the MLP.
/// @param nbOutputs The size of the output layer of the MLP.
void init(int nbInputs, const std::string & topology, int nbOutputs) override;
/// @brief Construct a new ReversedMLP for training.
ReversedMLP();
/// @brief Read and construct a trained ReversedMLP from a file.
///
/// The file must have been written by save.
/// @param filename The file to read the ReversedMLP from.
ReversedMLP(const std::string & filename);
/// @brief Give a score to each possible class, given an input.
///
/// @param fd The input to use.
///
/// @return A vector containing one score per possible class.
std::vector<float> predict(FeatureModel::FeatureDescription & fd) override;
/// @brief Update the parameters according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
float update(FeatureModel::FeatureDescription & fd, int gold) override;
/// @brief Get the loss according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
float update(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold) override;
/// @brief Get the loss according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
float getLoss(FeatureModel::FeatureDescription & fd, int gold) override;
/// @brief Get the loss according to the given gold vector.
///
/// @param fd The input to use.
/// @param gold The gold vector for this input.
///
/// @return The loss.
float getLoss(FeatureModel::FeatureDescription & fd, const std::vector<float> & gold) override;
/// @brief Save the ReversedMLP to a file.
///
/// @param filename The file to write the ReversedMLP to.
void save(const std::string & filename) override;
/// @brief Print the topology (Layers) of the ReversedMLP.
///
/// @param output Where the topology will be printed.
void printTopology(FILE * output) override;
/// @brief Allocate the correct trainer type depending on the program parameters.
///
/// @return A pointer to the newly allocated trainer.
dynet::Trainer * createTrainer();
void endOfIteration();
};
#endif
...@@ -170,7 +170,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f ...@@ -170,7 +170,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f
goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({(unsigned int)gold.size()}), gold)); goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({(unsigned int)gold.size()}), gold));
dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions); dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions);
batchedLoss = dynet::sum_batches(dynet::l1_distance(output, batchedGold)); batchedLoss = dynet::sum_batches(dynet::squared_distance(output, batchedGold));
cg.backward(batchedLoss); cg.backward(batchedLoss);
...@@ -260,7 +260,7 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector< ...@@ -260,7 +260,7 @@ float MLPBase::getLoss(FeatureModel::FeatureDescription & fd, const std::vector<
goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({1,(unsigned int)gold.size()}), gold)); goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({1,(unsigned int)gold.size()}), gold));
dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions); dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions);
batchedLoss = dynet::sum_batches(dynet::l1_distance(output, batchedGold)); batchedLoss = dynet::sum_batches(dynet::squared_distance(output, batchedGold));
checkGradients(); checkGradients();
......
#include "ReversedMLP.hpp"
ReversedMLP::ReversedMLP() : mlpPos("MLP_POS"), mlpNeg("MLP_NEG")
{
randomSeed = ProgramParameters::seed;
trainer.reset(createTrainer());
initDynet();
}
ReversedMLP::ReversedMLP(const std::string & filename) : mlpPos("MLP_POS"), mlpNeg("MLP_NEG")
{
randomSeed = ProgramParameters::seed;
trainer.reset(createTrainer());
initDynet();
mlpPos.loadStruct(model, filename, 0);
mlpPos.loadParameters(model, filename);
mlpNeg.loadStruct(model, filename, 1);
mlpNeg.loadParameters(model, filename);
}
void ReversedMLP::init(int nbInputs, const std::string & topology, int nbOutputs)
{
std::string safeTopology = "";
for (unsigned int i = 1; i < topology.size(); i++)
safeTopology.push_back(topology[i]);
setBatchSize(0);
mlpPos.init(model, nbInputs, safeTopology, nbOutputs);
mlpNeg.init(model, nbInputs, safeTopology, nbOutputs);
}
dynet::Trainer * ReversedMLP::createTrainer()
{
auto optimizer = noAccentLower(ProgramParameters::optimizer);
dynet::Trainer * trainer = nullptr;
if (optimizer == "amsgrad")
trainer = new dynet::AmsgradTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias);
else if (optimizer == "adam")
trainer = new dynet::AdamTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias);
else if (optimizer == "sgd")
trainer = new dynet::SimpleSGDTrainer(model, ProgramParameters::learningRate);
else if (optimizer == "none")
return nullptr;
if (trainer)
{
trainer->sparse_updates_enabled = true;
return trainer;
}
fprintf(stderr, "ERROR (%s) : unknown optimizer \'%s\'. Aborting.\n", ERRINFO, optimizer.c_str());
exit(1);
return nullptr;
}
std::vector<float> ReversedMLP::predict(FeatureModel::FeatureDescription & fd)
{
auto predPos = mlpPos.predict(fd);
auto predNeg = mlpNeg.predict(fd);
for (unsigned int i = 0; i < predPos.size(); i++)
predPos[i] -= predNeg[i];
return predPos;
}
float ReversedMLP::update(FeatureModel::FeatureDescription & fd, int gold)
{
mlpPos.setBatchSize(getBatchSize());
mlpNeg.setBatchSize(getBatchSize());
try
{
float loss = 0.0;
if (gold >= 0)
{
loss = mlpPos.update(fd, gold);
}
else
{
gold = -gold;
gold--;
loss = mlpPos.update(fd, gold);
}
trainer->update();
return loss;
} catch (BatchNotFull &)
{
return 0.0;
}
}
float ReversedMLP::update(FeatureModel::FeatureDescription &, const std::vector<float> &)
{
fprintf(stderr, "ERROR (%s) : only classification is supported. Aborting.\n", ERRINFO);
exit(1);
return 0.0;
}
float ReversedMLP::getLoss(FeatureModel::FeatureDescription & fd, int gold)
{
mlpPos.setBatchSize(getBatchSize());
mlpNeg.setBatchSize(getBatchSize());
try
{
float loss = 0.0;
if (gold >= 0)
{
loss = mlpPos.getLoss(fd, gold);
}
else
{
gold = -gold;
gold--;
loss = mlpPos.getLoss(fd, gold);
}
return loss;
} catch (BatchNotFull &)
{
return 0.0;
}
}
float ReversedMLP::getLoss(FeatureModel::FeatureDescription &, const std::vector<float> &)
{
fprintf(stderr, "ERROR (%s) : only classification is supported. Aborting.\n", ERRINFO);
exit(1);
return 0.0;
}
void ReversedMLP::save(const std::string & filename)
{
File * file = new File(filename, "w");
delete file;
mlpPos.saveStruct(filename);
mlpPos.saveParameters(filename);
mlpNeg.saveStruct(filename);
mlpNeg.saveParameters(filename);
}
void ReversedMLP::printTopology(FILE * output)
{
mlpPos.printTopology(output);
}
void ReversedMLP::endOfIteration()
{
mlpPos.endOfIteration();
mlpNeg.endOfIteration();
}
...@@ -404,7 +404,12 @@ void Trainer::doStepTrain() ...@@ -404,7 +404,12 @@ void Trainer::doStepTrain()
if (newCost >= lastCost) if (newCost >= lastCost)
{ {
// loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], tm.getCurrentClassifier()->getActionIndex("EPSILON")); if (true)
{
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], -(tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top())+1));
}
else
{
int nbActions = tm.getCurrentClassifier()->getNbActions(); int nbActions = tm.getCurrentClassifier()->getNbActions();
int backIndex = tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top()); int backIndex = tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top());
float value = 1.0 / (nbActions-1); float value = 1.0 / (nbActions-1);
...@@ -412,18 +417,25 @@ void Trainer::doStepTrain() ...@@ -412,18 +417,25 @@ void Trainer::doStepTrain()
goldOutput[backIndex] = 0.0; goldOutput[backIndex] = 0.0;
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], goldOutput); loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], goldOutput);
}
updateInfos = "predicted : <"+trainConfig.getCurrentStateHistory().top()+">, bad decision"; updateInfos = "predicted : <"+trainConfig.getCurrentStateHistory().top()+">, bad decision";
} }
else else
{ {
//loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top())); if (true)
{
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top()));
}
else
{
int nbActions = tm.getCurrentClassifier()->getNbActions(); int nbActions = tm.getCurrentClassifier()->getNbActions();
int backIndex = tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top()); int backIndex = tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top());
std::vector<float> goldOutput(nbActions, 0.0); std::vector<float> goldOutput(nbActions, 0.0);
goldOutput[backIndex] = 1.0; goldOutput[backIndex] = 1.0;
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], goldOutput); loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], goldOutput);
}
updateInfos = "predicted : <"+trainConfig.getCurrentStateHistory().top()+">, good decision"; updateInfos = "predicted : <"+trainConfig.getCurrentStateHistory().top()+">, good decision";
} }
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include "File.hpp" #include "File.hpp"
#include "util.hpp" #include "util.hpp"
#include "MLP.hpp" #include "MLP.hpp"
#include "ReversedMLP.hpp"
#include "GeneticAlgorithm.hpp" #include "GeneticAlgorithm.hpp"
Classifier::Classifier(const std::string & filename, bool trainMode) Classifier::Classifier(const std::string & filename, bool trainMode)
...@@ -372,6 +373,9 @@ NeuralNetwork * Classifier::createNeuralNetwork() ...@@ -372,6 +373,9 @@ NeuralNetwork * Classifier::createNeuralNetwork()
if (splited.size() == 2) if (splited.size() == 2)
return new GeneticAlgorithm(); return new GeneticAlgorithm();
if (topology[0] == 'R')
return new ReversedMLP();
return new MLP(); return new MLP();
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment