Select Git revision
train_cff.cpp
Marjorie Armando authored
train_cff.cpp 5.29 KiB
#include "train_cff.hpp"
using namespace std;
using namespace dynet;
/**
* \brief Build a feed forward layer
*
* \param input_dim : Input dimension
* \param output_dim : Output dimension
* \param activation : Activation function
* \param dropout_rate : Dropout rate
*/
Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate):
input_dim(input_dim),
output_dim(output_dim),
activation(activation),
dropout_rate(dropout_rate)
{
}
/**
* \brief Default constructor
* \details Dont forget to add layers !
*/
MLP::MLP(ParameterCollection & model)
{
LAYERS = layers.size();
}
/**
* \brief Returns a Multilayer perceptron
* \details Creates a feedforward multilayer perceptron based on a list of layer descriptions
*
* \param model : ParameterCollection (to contain parameters)
* \param filename : file containing the MLP's structure
*/
MLP::MLP(ParameterCollection& model, char* filename/*vector<Layer> layers*/)
{
read_struct_mlp(filename);
// Verify layers compatibility
for (unsigned l = 0; l < layers.size() - 1; ++l)
{
if (layers[l].output_dim != layers[l + 1].input_dim)
throw invalid_argument("Layer dimensions don't match");
}
// Register parameters in model
for (Layer layer : layers)
{
append(model, layer);
}
}
/**
* \brief Append a layer at the end of the network
*
* \param model
* \param layer
*/
void MLP::append(ParameterCollection& model, Layer layer)
{
// Check compatibility
if (LAYERS > 0)
if (layers[LAYERS - 1].output_dim != layer.input_dim)
throw invalid_argument("Layer dimensions don't match");
// Add to layers
layers.push_back(layer);
LAYERS++;
// Register parameters
Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
Parameter b = model.add_parameters({layer.output_dim});
params.push_back({W, b});
}
/**
* \brief Run the MLP on an input vector/batch
*
* \param x : Input expression (vector or batch)
* \param cg : Computation graph
*
* \return
*/
Expression MLP::run(Expression x, ComputationGraph& cg)
{
Expression h_cur = x; // Expression for the current hidden state
for (unsigned l = 0; l < LAYERS; ++l)
{
/* Initialize parameters in computation graph */
Expression W = parameter(cg, params[l][0]);
Expression b = parameter(cg, params[l][1]);
Expression a = affine_transform({b, W, h_cur}); // Aplly affine transform
Expression h = activate(a, layers[l].activation); // Apply activation function
Expression h_dropped; // Take care of dropout
if (layers[l].dropout_rate > 0)
{
if (dropout_active)
{
// During training, drop random units
Expression mask = random_bernoulli(cg, {layers[l].output_dim}, 1 - layers[l].dropout_rate);
h_dropped = cmult(h, mask);
}
else
{
h_dropped = h * (1 - layers[l].dropout_rate); // At test time, multiply by the retention rate to scale
}
}
else
h_dropped = h; // If there's no dropout, don't do anything
h_cur = h_dropped; // Set current hidden state
}
return h_cur;
}
/**
* \brief Return the negative log likelihood for the (batched) pair (x,y)
*
* \param x : Input batch
* \param labels : Output labels
* \param cg : Computation graph
*
* \return Expression for the negative log likelihood on the batch
*/
Expression MLP::get_nll(Expression x, vector<unsigned int> labels, ComputationGraph& cg)
{
Expression y = run(x, cg); // compute output
Expression losses = pickneglogsoftmax(y, labels); // Do softmax
return sum_batches(losses); // Sum across batches
}
/**
* \brief Predict the most probable label
* \details Returns the argmax of the softmax of the networks output
*
* \param x : Input
* \param cg : Computation graph
*
* \return Label index
*/
int MLP::predict(Expression x, ComputationGraph& cg)
{
Expression y = run(x, cg); // run MLP to get class distribution
vector<float> probs = as_vector(cg.forward(y)); // Get values
// Get argmax
unsigned argmax = 0;
for (unsigned i = 1; i < probs.size(); ++i)
{
if (probs[i] > probs[argmax])
argmax = i;
}
return argmax;
}
/**
* \brief Enable dropout
* \details This is supposed to be used during training or during testing if you want to sample outputs using montecarlo
*/
void MLP::enable_dropout()
{
dropout_active = true;
}
/**
* \brief Disable dropout
* \details Do this during testing if you want a deterministic network
*/
void MLP::disable_dropout()
{
dropout_active = false;
}
/**
* \brief Check wether dropout is enabled or not
*
* \return Dropout state
*/
bool MLP::is_dropout_enabled()
{
return dropout_active;
}
inline Expression MLP::activate(Expression h, Activation f)
{
switch (f)
{
case LINEAR:
return h;
break;
case RELU:
return rectify(h);
break;
case SIGMOID:
return logistic(h);
break;
case TANH:
return tanh(h);
break;
case SOFTMAX:
return softmax(h);
break;
default:
throw invalid_argument("Unknown activation function");
break;
}
}
void MLP::read_struct_mlp(char* filename)
{
ifstream file(filename, ios::in);
if(!file)
{
cerr << "Impossible d'ouvrir le fichier " << filename << endl;
exit(EXIT_FAILURE);
}
float tmp[4];
while(file >> tmp[0]) //input_dim
{
file >> tmp[1]; //output_dim
file >> tmp[2]; //activation rate
file >> tmp[3]; //dropout
Layer tmp_layer(tmp[0], tmp[1], tmp[2], tmp[3]);
layers.push_back(tmp_layer);
}
}