Skip to content
Snippets Groups Projects
Commit d1519b7e authored by Franck Dary's avatar Franck Dary
Browse files

Added class MLPBase

parent 0da7cec7
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@
#define MLP__H
#include "NeuralNetwork.hpp"
#include "MLPBase.hpp"
#include "ProgramParameters.hpp"
/// @brief Multi Layer Perceptron.
......@@ -17,77 +18,10 @@ class MLP : public NeuralNetwork
{
private :
/// @brief The Layers of the MLP.
std::vector<Layer> layers;
/// @brief The parameters corresponding to the layers of the MLP.
std::vector< std::vector<dynet::Parameter> > parameters;
/// @brief The mlp that will be trained.
MLPBase mlp;
/// @brief The training algorithm that will be used.
std::unique_ptr<dynet::Trainer> trainer;
/// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step.
bool dropoutActive;
/// @brief The current minibatch.
std::vector<FeatureModel::FeatureDescription> fds;
/// @brief gold classes of the current minibatch.
std::vector<unsigned int> golds;
private :
/// @brief Add the parameters of a layer into the dynet model.
///
/// @param layer The layer to add.
void addLayerToModel(Layer & layer);
/// @brief Abort the program if the layers are not compatible.
void checkLayersCompatibility();
/// @brief Compute the image of input x by the Multi Layer Perceptron.
///
/// @param cg The current computation graph.
/// @param x The input of the Multi Layer Perceptron.
///
/// @return The result (values of the output Layer) of the computation of x by the Multi Layer Perceptron.
dynet::Expression run(dynet::ComputationGraph & cg, dynet::Expression x);
/// @brief Print the parameters.
///
/// @param output Where the parameters will be printed to.
void printParameters(FILE * output);
/// @brief Save the structure of the MLP (all the Layer) to a file.
///
/// The goal is to store the structure of the MLP into a file so that
/// we can load it and use it another time.
/// @param filename The file in which the structure will be saved.
void saveStruct(const std::string & filename);
/// @brief Save the learned parameters of the MLP to a file.
///
/// Only the parameters of the Layers will be saved by this function.\n
/// The parameters that are values inside of Dict, will be saved by their owner,
/// the Dict object.
/// @param filename The file in which the parameters will be saved.
void saveParameters(const std::string & filename);
/// @brief Load and construt all the Layer from a file.
///
/// The file must have been written by the function saveStruct.
/// @param filename The file from which the structure will be read.
void loadStruct(const std::string & filename);
/// @brief Load and populate the model with parameters from a file.
///
/// The file must have been written by the function saveParameters.
/// @param filename The file from which the parameters will be read.
void loadParameters(const std::string & filename);
/// @brief Load a MLP from a file.
///
/// This function will use loadStruct and loadParameters.
/// @param filename The file from which the MLP will be loaded.
void load(const std::string & filename);
/// @brief Get the loss expression
///
/// @param output Output from the neural network
/// @param oneHotGolds Indexes of gold classes (batched form)
///
/// @return The loss expression
dynet::Expression weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds);
dynet::Expression errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds);
public :
......@@ -113,7 +47,6 @@ class MLP : public NeuralNetwork
///
/// @return A vector containing one score per possible class.
std::vector<float> predict(FeatureModel::FeatureDescription & fd) override;
/// @brief Update the parameters according to the given gold class.
///
/// @param fd The input to use.
......@@ -121,7 +54,6 @@ class MLP : public NeuralNetwork
///
/// @return The loss.
float update(FeatureModel::FeatureDescription & fd, int gold) override;
/// @brief Save the MLP to a file.
///
/// @param filename The file to write the MLP to.
......
/// @file MLPBase.hpp
/// @author Franck Dary
/// @version 1.0
/// @date 2019-01-13
#ifndef MLPBASE__H
#define MLPBASE__H
#include <vector>
#include <map>
#include <string>
#include "ProgramParameters.hpp"
#include "NeuralNetwork.hpp"
/// @brief Multi Layer Perceptron.
///
/// Once trained, it can also be used to predict the class of a certain input.
class MLPBase
{
private :
using Layer = NeuralNetwork::Layer;
/// @brief The Layers of the MLP.
std::vector<Layer> layers;
/// @brief The parameters corresponding to the layers of the MLP.
std::vector< std::vector<dynet::Parameter> > parameters;
/// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step.
bool dropoutActive;
/// @brief The current minibatch.
std::vector<FeatureModel::FeatureDescription> fds;
/// @brief gold classes of the current minibatch.
std::vector<unsigned int> golds;
public :
/// @brief Add the parameters of a layer into the dynet model.
///
/// @param model The dynet model that will contains the parameters of the layer.
/// @param layer The layer to add.
void addLayerToModel(dynet::ParameterCollection & model, Layer & layer);
/// @brief Abort the program if the layers are not compatible.
void checkLayersCompatibility();
/// @brief Compute the image of input x by the Multi Layer Perceptron.
///
/// @param cg The current computation graph.
/// @param x The input of the Multi Layer Perceptron.
///
/// @return The result (values of the output Layer) of the computation of x by the Multi Layer Perceptron.
dynet::Expression run(dynet::ComputationGraph & cg, dynet::Expression x);
/// @brief Print the parameters.
///
/// @param output Where the parameters will be printed to.
void printParameters(FILE * output);
/// @brief Save the structure of the MLP (all the Layer) to a file.
///
/// The goal is to store the structure of the MLP into a file so that
/// we can load it and use it another time.
/// @param filename The file in which the structure will be saved.
void saveStruct(const std::string & filename);
/// @brief Save the learned parameters of the MLP to a file.
///
/// Only the parameters of the Layers will be saved by this function.\n
/// The parameters that are values inside of Dict, will be saved by their owner,
/// the Dict object.
/// @param filename The file in which the parameters will be saved.
void saveParameters(const std::string & filename);
/// @brief Load and construt all the Layer from a file.
///
/// The file must have been written by the function saveStruct.
/// @param model The dynet model that will contain the loaded parameters.
/// @param filename The file from which the structure will be read.
void loadStruct(dynet::ParameterCollection & model, const std::string & filename);
/// @brief Load and populate the model with parameters from a file.
///
/// The file must have been written by the function saveParameters.
/// @param model The dynet model that will contain the loaded parameters.
/// @param filename The file from which the parameters will be read.
void loadParameters(dynet::ParameterCollection & model, const std::string & filename);
/// @brief Get the loss expression
///
/// @param output Output from the neural network
/// @param oneHotGolds Indexes of gold classes (batched form)
///
/// @return The loss expression
dynet::Expression weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds);
dynet::Expression errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds);
/// @brief initialize a new untrained MLP from a desired topology.
///
/// topology example for 2 hidden layers : (150,RELU,0.3)(50,ELU,0.2)\n
/// Of sizes 150 and 50, activation functions RELU and ELU, and dropout rates
/// of 0.3 and 0.2.
/// @param model The dynet model that will contains all the MLP parameters.
/// @param nbInputs The size of the input layer of the MLP.
/// @param topology Description of each hidden Layer of the MLP.
/// @param nbOutputs The size of the output layer of the MLP.
void init(dynet::ParameterCollection & model, int nbInputs, const std::string & topology, int nbOutputs);
/// @brief Construct a new MLP for training.
MLPBase();
/// @brief Give a score to each possible class, given an input.
///
/// @param fd The input to use.
///
/// @return A vector containing one score per possible class.
std::vector<float> predict(FeatureModel::FeatureDescription & fd);
/// @brief Update the parameters according to the given gold class.
///
/// @param fd The input to use.
/// @param gold The gold class of this input.
///
/// @return The loss.
float update(FeatureModel::FeatureDescription & fd, int gold);
/// @brief Print the topology (Layers) of the MLP.
///
/// @param output Where the topology will be printed.
void printTopology(FILE * output);
};
#endif
......@@ -12,7 +12,7 @@
class NeuralNetwork
{
protected :
public :
/// @brief Activation function for a Layer.
enum Activation
......@@ -68,6 +68,22 @@ class NeuralNetwork
void print(FILE * file);
};
/// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the NeuralNetwork.
///
/// @param cg The current Computation Graph.
/// @param fv The FeatureValue that will be converted.
///
/// @return A dynet Expression of value fv that can be used as an input in the NeuralNetwork
static dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv);
/// @brief Compute the image of an expression by an activation function.
///
/// @param h The expression we want the image of.
/// @param f The activation function.
///
/// @return f(h)
static dynet::Expression activate(dynet::Expression h, Activation f);
protected :
/// @brief The seed that will be used by RNG (srand and dynet)
......@@ -81,13 +97,6 @@ class NeuralNetwork
protected :
/// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the NeuralNetwork.
///
/// @param cg The current Computation Graph.
/// @param fv The FeatureValue that will be converted.
///
/// @return A dynet Expression of value fv that can be used as an input in the NeuralNetwork
dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv);
/// @brief Set dynet and srand() seeds.
///
/// @return The DynetParams containing the set seed.
......@@ -96,13 +105,6 @@ class NeuralNetwork
///
/// Must be called only once, and before any call to dynet functions.
void initDynet();
/// @brief Compute the image of an expression by an activation function.
///
/// @param h The expression we want the image of.
/// @param f The activation function.
///
/// @return f(h)
dynet::Expression activate(dynet::Expression h, Activation f);
public :
......
......@@ -3,44 +3,23 @@
MLP::MLP()
{
randomSeed = ProgramParameters::seed;
dropoutActive = true;
trainer.reset(createTrainer());
initDynet();
}
void MLP::init(int nbInputs, const std::string & topology, int nbOutputs)
{
std::string topo = topology;
std::replace(topo.begin(), topo.end(), '(', ' ');
std::replace(topo.begin(), topo.end(), ')', ' ');
auto groups = split(topo);
for (auto group : groups)
{
if(group.empty())
continue;
std::replace(group.begin(), group.end(), ',', ' ');
auto layer = split(group);
if (layer.size() != 3)
MLP::MLP(const std::string & filename)
{
fprintf(stderr, "ERROR (%s) : invalid topology \'%s\'. Aborting.\n", ERRINFO, topology.c_str());
exit(1);
}
randomSeed = ProgramParameters::seed;
trainer.reset(createTrainer());
initDynet();
int input = layers.empty() ? nbInputs : layers.back().output_dim;
int output = std::stoi(layer[0]);
float dropout = std::stof(layer[2]);
layers.emplace_back(input, output, dropout, str2activation(layer[1]));
mlp.loadStruct(model, filename);
mlp.loadParameters(model, filename);
}
layers.emplace_back(layers.back().output_dim, nbOutputs, 0.0, Activation::LINEAR);
checkLayersCompatibility();
for(Layer layer : layers)
addLayerToModel(layer);
void MLP::init(int nbInputs, const std::string & topology, int nbOutputs)
{
mlp.init(model, nbInputs, topology, nbOutputs);
}
dynet::Trainer * MLP::createTrainer()
......@@ -63,325 +42,28 @@ dynet::Trainer * MLP::createTrainer()
return nullptr;
}
void MLP::addLayerToModel(Layer & layer)
{
dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim});
dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim});
if (!ProgramParameters::randomParameters)
{
W.set_value(std::vector<float>((unsigned)layer.output_dim * (unsigned)layer.input_dim, 1.0));
b.set_value(std::vector<float>((unsigned)layer.output_dim, 1.0));
}
parameters.push_back({W,b});
}
void MLP::checkLayersCompatibility()
{
if(layers.empty())
{
fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO);
exit(1);
}
for(unsigned int i = 0; i < layers.size()-1; i++)
if(layers[i].output_dim != layers[i+1].input_dim)
{
fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO);
exit(1);
}
}
std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd)
{
bool currentDropoutActive = dropoutActive;
dropoutActive = false;
dynet::ComputationGraph cg;
std::vector<dynet::Expression> expressions;
for (auto & featValue : fd.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
dynet::Expression input = dynet::concatenate(expressions);
dynet::Expression output = run(cg, input);
dropoutActive = currentDropoutActive;
return as_vector(cg.forward(output));
return mlp.predict(fd);
}
float MLP::update(FeatureModel::FeatureDescription & fd, int gold)
{
fds.emplace_back(fd);
golds.emplace_back(gold);
float loss = mlp.update(fd, gold);
if ((int)fds.size() < ProgramParameters::batchSize)
return 0.0;
std::vector<dynet::Expression> inputs;
dynet::ComputationGraph cg;
for (auto & example : fds)
{
std::vector<dynet::Expression> expressions;
for (auto & featValue : example.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
dynet::Expression input = dynet::concatenate(expressions);
inputs.emplace_back(input);
}
dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs);
dynet::Expression output = run(cg, batchedInput);
dynet::Expression batchedLoss;
if (ProgramParameters::loss == "neglogsoftmax")
{
batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, golds));
}
else if (ProgramParameters::loss == "weighted")
{
batchedLoss = weightedLoss(output, golds);
}
else if (ProgramParameters::loss == "errorCorrection")
{
batchedLoss = errorCorrectionLoss(cg,output, golds);
}
else
{
fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str());
exit(1);
}
cg.backward(batchedLoss);
trainer->update();
fds.clear();
golds.clear();
return as_scalar(batchedLoss.value());
}
dynet::Expression MLP::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds)
{
std::vector<dynet::Expression> lossExpr;
for (unsigned int i = 0; i < output.dim().batch_elems(); i++)
{
lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i]));
auto outputVect = dynet::as_vector(dynet::pick_batch_elem(output,i).value());
int prediction = 0;
for (unsigned int j = 1; j < outputVect.size(); j++)
if(outputVect[j] > outputVect[prediction])
prediction = j;
int gold = oneHotGolds[i];
if (prediction == 1 && gold == 0)
{
lossExpr.back() = lossExpr.back() * 100.0;
}
}
return dynet::sum(lossExpr);
}
dynet::Expression MLP::errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds)
{
std::vector<dynet::Expression> lossExpr;
for (unsigned int i = 0; i < output.dim().batch_elems(); i++)
{
unsigned int u = 0;
dynet::Expression c = dynet::pick(dynet::one_hot(cg, layers.back().output_dim, oneHotGolds[i]),u);
dynet::Expression a = dynet::pick(dynet::softmax(dynet::pick_batch_elem(output,i)),u);
lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i),oneHotGolds[i])+2-c-a*c+(dynet::acos(a-1)*(c-1)));
if (ProgramParameters::debug)
{
cg.forward(lossExpr.back());
fprintf(stderr, "a=%.2f c=%.2f loss=%.2f\n", dynet::as_scalar(a.value()),dynet::as_scalar(c.value()),dynet::as_scalar(lossExpr.back().value()));
}
}
return dynet::sum(lossExpr);
}
dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x)
{
static std::vector< std::pair<std::string,dynet::Expression> > exprForDebug;
// Expression for the current hidden state
dynet::Expression h_cur = x;
if (ProgramParameters::showFeatureRepresentation)
{
if (ProgramParameters::showFeatureRepresentation == 1)
for (unsigned int i = 0; i < 81; i++)
fprintf(stderr, "%s", i == 80 ? "\n" : "-");
exprForDebug.clear();
if (ProgramParameters::showFeatureRepresentation == 1)
exprForDebug.emplace_back("Input layer", h_cur);
if (ProgramParameters::showFeatureRepresentation >= 2)
exprForDebug.emplace_back("", h_cur);
}
for(unsigned int l = 0; l < layers.size(); l++)
{
// Initialize parameters in computation graph
dynet::Expression W = parameter(cg, parameters[l][0]);
dynet::Expression b = parameter(cg, parameters[l][1]);
// Apply affine transform
dynet::Expression a = dynet::affine_transform({b, W, h_cur});
// Apply activation function
dynet::Expression h = activate(a, layers[l].activation);
// Take care of dropout
dynet::Expression h_dropped;
if(layers[l].dropout_rate > 0){
if(dropoutActive){
dynet::Expression mask = random_bernoulli(cg,
{(unsigned int)layers[l].output_dim}, 1 - layers[l].dropout_rate);
h_dropped = cmult(h, mask);
}
else{
h_dropped = h * (1 - layers[l].dropout_rate);
}
}
else{
h_dropped = h;
}
if (ProgramParameters::showFeatureRepresentation)
{
if (ProgramParameters::showFeatureRepresentation == 1)
{
exprForDebug.emplace_back("Result of h = h*W_" + std::to_string(l) + " + b_" + std::to_string(l), a);
exprForDebug.emplace_back("Result of h = a_" + std::to_string(l) + "(h)", h);
exprForDebug.emplace_back("Result of h = dropout_" + std::to_string(l) + "(h)", h_dropped);
}
else if (ProgramParameters::showFeatureRepresentation >= 2)
{
exprForDebug.emplace_back("", a);
exprForDebug.emplace_back("", h);
}
}
h_cur = h_dropped;
}
if (ProgramParameters::showFeatureRepresentation)
{
cg.forward(h_cur);
if (ProgramParameters::showFeatureRepresentation == 1)
{
for (auto & it : exprForDebug)
fprintf(stderr, "%s (dimension=%lu) :\n%s\n", it.first.c_str(), dynet::as_vector(it.second.value()).size(), expression2str(it.second).c_str());
for (unsigned int i = 0; i < 81; i++)
fprintf(stderr, "%s", i == 80 ? "\n" : "-");
}
else if (ProgramParameters::showFeatureRepresentation >= 2)
{
for (auto & it : exprForDebug)
fprintf(stderr, "| %s |", expression2str(it.second).c_str());
fprintf(stderr, "\n");
}
}
return h_cur;
}
void MLP::printParameters(FILE * output)
{
fprintf(output, "Parameters : NOT IMPLEMENTED\n");
return loss;
}
void MLP::save(const std::string & filename)
{
saveStruct(filename);
saveParameters(filename);
}
void MLP::saveStruct(const std::string & filename)
{
File file(filename, "w");
FILE * fd = file.getDescriptor();
for (auto & layer : layers)
{
fprintf(fd, "Layer : %d %d %s %.2f\n", layer.input_dim, layer.output_dim, activation2str(layer.activation).c_str(), layer.dropout_rate);
}
}
void MLP::saveParameters(const std::string & filename)
{
dynet::TextFileSaver s(filename, true);
std::string prefix("Layer_");
for(unsigned int i = 0; i < parameters.size(); i++)
{
s.save(parameters[i][0], prefix + std::to_string(i) + "_W");
s.save(parameters[i][1], prefix + std::to_string(i) + "_b");
}
}
void MLP::load(const std::string & filename)
{
loadStruct(filename);
loadParameters(filename);
}
void MLP::loadStruct(const std::string & filename)
{
File file(filename, "r");
FILE * fd = file.getDescriptor();
char activation[1024];
int input;
int output;
float dropout;
while (fscanf(fd, "Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4)
layers.emplace_back(input, output, dropout, str2activation(activation));
checkLayersCompatibility();
for (auto & layer : layers)
addLayerToModel(layer);
}
void MLP::loadParameters(const std::string & filename)
{
dynet::TextFileLoader loader(filename);
std::string prefix("Layer_");
for(unsigned int i = 0; i < parameters.size(); i++)
{
parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W");
parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b");
}
}
MLP::MLP(const std::string & filename)
{
dropoutActive = true;
randomSeed = ProgramParameters::seed;
trainer.reset(createTrainer());
initDynet();
load(filename);
mlp.saveStruct(filename);
mlp.saveParameters(filename);
}
void MLP::printTopology(FILE * output)
{
fprintf(output, "(");
for(unsigned int i = 0; i < layers.size(); i++)
{
auto & layer = layers[i];
if(i == 0)
fprintf(output, "%d", layer.input_dim);
fprintf(output, "->%d", layer.output_dim);
}
fprintf(output, ")\n");
mlp.printTopology(output);
}
#include "MLPBase.hpp"
MLPBase::MLPBase()
{
dropoutActive = true;
}
void MLPBase::init(dynet::ParameterCollection & model, int nbInputs, const std::string & topology, int nbOutputs)
{
std::string topo = topology;
std::replace(topo.begin(), topo.end(), '(', ' ');
std::replace(topo.begin(), topo.end(), ')', ' ');
auto groups = split(topo);
for (auto group : groups)
{
if(group.empty())
continue;
std::replace(group.begin(), group.end(), ',', ' ');
auto layer = split(group);
if (layer.size() != 3)
{
fprintf(stderr, "ERROR (%s) : invalid topology \'%s\'. Aborting.\n", ERRINFO, topology.c_str());
exit(1);
}
int input = layers.empty() ? nbInputs : layers.back().output_dim;
int output = std::stoi(layer[0]);
float dropout = std::stof(layer[2]);
layers.emplace_back(input, output, dropout, NeuralNetwork::str2activation(layer[1]));
}
layers.emplace_back(layers.back().output_dim, nbOutputs, 0.0, NeuralNetwork::Activation::LINEAR);
checkLayersCompatibility();
for(Layer layer : layers)
addLayerToModel(model, layer);
}
void MLPBase::addLayerToModel(dynet::ParameterCollection & model, Layer & layer)
{
dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim});
dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim});
if (!ProgramParameters::randomParameters)
{
W.set_value(std::vector<float>((unsigned)layer.output_dim * (unsigned)layer.input_dim, 1.0));
b.set_value(std::vector<float>((unsigned)layer.output_dim, 1.0));
}
parameters.push_back({W,b});
}
void MLPBase::checkLayersCompatibility()
{
if(layers.empty())
{
fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO);
exit(1);
}
for(unsigned int i = 0; i < layers.size()-1; i++)
if(layers[i].output_dim != layers[i+1].input_dim)
{
fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO);
exit(1);
}
}
std::vector<float> MLPBase::predict(FeatureModel::FeatureDescription & fd)
{
bool currentDropoutActive = dropoutActive;
dropoutActive = false;
dynet::ComputationGraph cg;
std::vector<dynet::Expression> expressions;
for (auto & featValue : fd.values)
expressions.emplace_back(NeuralNetwork::featValue2Expression(cg, featValue));
dynet::Expression input = dynet::concatenate(expressions);
dynet::Expression output = run(cg, input);
dropoutActive = currentDropoutActive;
return as_vector(cg.forward(output));
}
float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold)
{
fds.emplace_back(fd);
golds.emplace_back(gold);
if ((int)fds.size() < ProgramParameters::batchSize)
return 0.0;
std::vector<dynet::Expression> inputs;
dynet::ComputationGraph cg;
for (auto & example : fds)
{
std::vector<dynet::Expression> expressions;
for (auto & featValue : example.values)
expressions.emplace_back(NeuralNetwork::featValue2Expression(cg, featValue));
dynet::Expression input = dynet::concatenate(expressions);
inputs.emplace_back(input);
}
dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs);
dynet::Expression output = run(cg, batchedInput);
dynet::Expression batchedLoss;
if (ProgramParameters::loss == "neglogsoftmax")
{
batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, golds));
}
else if (ProgramParameters::loss == "weighted")
{
batchedLoss = weightedLoss(output, golds);
}
else if (ProgramParameters::loss == "errorCorrection")
{
batchedLoss = errorCorrectionLoss(cg, output, golds);
}
else
{
fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str());
exit(1);
}
cg.backward(batchedLoss);
fds.clear();
golds.clear();
return as_scalar(batchedLoss.value());
}
dynet::Expression MLPBase::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds)
{
std::vector<dynet::Expression> lossExpr;
for (unsigned int i = 0; i < output.dim().batch_elems(); i++)
{
lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i]));
auto outputVect = dynet::as_vector(dynet::pick_batch_elem(output,i).value());
int prediction = 0;
for (unsigned int j = 1; j < outputVect.size(); j++)
if(outputVect[j] > outputVect[prediction])
prediction = j;
int gold = oneHotGolds[i];
if (prediction == 1 && gold == 0)
{
lossExpr.back() = lossExpr.back() * 100.0;
}
}
return dynet::sum(lossExpr);
}
dynet::Expression MLPBase::errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds)
{
std::vector<dynet::Expression> lossExpr;
for (unsigned int i = 0; i < output.dim().batch_elems(); i++)
{
unsigned int u = 0;
dynet::Expression c = dynet::pick(dynet::one_hot(cg, layers.back().output_dim, oneHotGolds[i]),u);
dynet::Expression a = dynet::pick(dynet::softmax(dynet::pick_batch_elem(output,i)),u);
lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i),oneHotGolds[i])+2-c-a*c+(dynet::acos(a-1)*(c-1)));
if (ProgramParameters::debug)
{
cg.forward(lossExpr.back());
fprintf(stderr, "a=%.2f c=%.2f loss=%.2f\n", dynet::as_scalar(a.value()),dynet::as_scalar(c.value()),dynet::as_scalar(lossExpr.back().value()));
}
}
return dynet::sum(lossExpr);
}
dynet::Expression MLPBase::run(dynet::ComputationGraph & cg, dynet::Expression x)
{
static std::vector< std::pair<std::string,dynet::Expression> > exprForDebug;
// Expression for the current hidden state
dynet::Expression h_cur = x;
if (ProgramParameters::showFeatureRepresentation)
{
if (ProgramParameters::showFeatureRepresentation == 1)
for (unsigned int i = 0; i < 81; i++)
fprintf(stderr, "%s", i == 80 ? "\n" : "-");
exprForDebug.clear();
if (ProgramParameters::showFeatureRepresentation == 1)
exprForDebug.emplace_back("Input layer", h_cur);
if (ProgramParameters::showFeatureRepresentation >= 2)
exprForDebug.emplace_back("", h_cur);
}
for(unsigned int l = 0; l < layers.size(); l++)
{
// Initialize parameters in computation graph
dynet::Expression W = parameter(cg, parameters[l][0]);
dynet::Expression b = parameter(cg, parameters[l][1]);
// Apply affine transform
dynet::Expression a = dynet::affine_transform({b, W, h_cur});
// Apply activation function
dynet::Expression h = NeuralNetwork::activate(a, layers[l].activation);
// Take care of dropout
dynet::Expression h_dropped;
if(layers[l].dropout_rate > 0){
if(dropoutActive){
dynet::Expression mask = random_bernoulli(cg,
{(unsigned int)layers[l].output_dim}, 1 - layers[l].dropout_rate);
h_dropped = cmult(h, mask);
}
else{
h_dropped = h * (1 - layers[l].dropout_rate);
}
}
else{
h_dropped = h;
}
if (ProgramParameters::showFeatureRepresentation)
{
if (ProgramParameters::showFeatureRepresentation == 1)
{
exprForDebug.emplace_back("Result of h = h*W_" + std::to_string(l) + " + b_" + std::to_string(l), a);
exprForDebug.emplace_back("Result of h = a_" + std::to_string(l) + "(h)", h);
exprForDebug.emplace_back("Result of h = dropout_" + std::to_string(l) + "(h)", h_dropped);
}
else if (ProgramParameters::showFeatureRepresentation >= 2)
{
exprForDebug.emplace_back("", a);
exprForDebug.emplace_back("", h);
}
}
h_cur = h_dropped;
}
if (ProgramParameters::showFeatureRepresentation)
{
cg.forward(h_cur);
if (ProgramParameters::showFeatureRepresentation == 1)
{
for (auto & it : exprForDebug)
fprintf(stderr, "%s (dimension=%lu) :\n%s\n", it.first.c_str(), dynet::as_vector(it.second.value()).size(), NeuralNetwork::expression2str(it.second).c_str());
for (unsigned int i = 0; i < 81; i++)
fprintf(stderr, "%s", i == 80 ? "\n" : "-");
}
else if (ProgramParameters::showFeatureRepresentation >= 2)
{
for (auto & it : exprForDebug)
fprintf(stderr, "| %s |", NeuralNetwork::expression2str(it.second).c_str());
fprintf(stderr, "\n");
}
}
return h_cur;
}
void MLPBase::printParameters(FILE * output)
{
fprintf(output, "Parameters : NOT IMPLEMENTED\n");
}
void MLPBase::saveStruct(const std::string & filename)
{
File file(filename, "w");
FILE * fd = file.getDescriptor();
for (auto & layer : layers)
{
fprintf(fd, "Layer : %d %d %s %.2f\n", layer.input_dim, layer.output_dim, NeuralNetwork::activation2str(layer.activation).c_str(), layer.dropout_rate);
}
}
void MLPBase::saveParameters(const std::string & filename)
{
dynet::TextFileSaver s(filename, true);
std::string prefix("Layer_");
for(unsigned int i = 0; i < parameters.size(); i++)
{
s.save(parameters[i][0], prefix + std::to_string(i) + "_W");
s.save(parameters[i][1], prefix + std::to_string(i) + "_b");
}
}
void MLPBase::loadStruct(dynet::ParameterCollection & model, const std::string & filename)
{
File file(filename, "r");
FILE * fd = file.getDescriptor();
char activation[1024];
int input;
int output;
float dropout;
while (fscanf(fd, "Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4)
layers.emplace_back(input, output, dropout, NeuralNetwork::str2activation(activation));
checkLayersCompatibility();
for (auto & layer : layers)
addLayerToModel(model, layer);
}
void MLPBase::loadParameters(dynet::ParameterCollection & model, const std::string & filename)
{
dynet::TextFileLoader loader(filename);
std::string prefix("Layer_");
for(unsigned int i = 0; i < parameters.size(); i++)
{
parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W");
parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b");
}
}
void MLPBase::printTopology(FILE * output)
{
fprintf(output, "(");
for(unsigned int i = 0; i < layers.size(); i++)
{
auto & layer = layers[i];
if(i == 0)
fprintf(output, "%d", layer.input_dim);
fprintf(output, "->%d", layer.output_dim);
}
fprintf(output, ")\n");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment