train_cff.cpp

#include "train_cff.hpp"

using namespace std;
using namespace dynet;

/**
	* \brief Build a feed forward layer
	* 
	* \param input_dim : Input dimension
	* \param output_dim : Output dimension
	* \param activation : Activation function
	* \param dropout_rate : Dropout rate
*/
Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate):
	input_dim(input_dim),
	output_dim(output_dim),
	activation(activation),
	dropout_rate(dropout_rate) 
{

}

/**
	* \brief Default constructor
	* \details Dont forget to add layers !
*/
MLP::MLP(ParameterCollection & model)
{
	LAYERS = layers.size();
}

/**
	* \brief Returns a Multilayer perceptron
	* \details Creates a feedforward multilayer perceptron based on a list of layer descriptions
	*
	* \param model  : ParameterCollection (to contain parameters)
	* \param filename : file containing the MLP's structure
*/
MLP::MLP(ParameterCollection& model, char* filename/*vector<Layer> layers*/) 
{
	read_struct_mlp(filename);
	// Verify layers compatibility
	for (unsigned l = 0; l < layers.size() - 1; ++l) 
	{
		if (layers[l].output_dim != layers[l + 1].input_dim)
			throw invalid_argument("Layer dimensions don't match");
	}

	// Register parameters in model
	for (Layer layer : layers) 
	{
		append(model, layer);
	}
}

/**
	* \brief Append a layer at the end of the network
	*
	* \param model 
	* \param layer 
*/
void MLP::append(ParameterCollection& model, Layer layer) 
{
	// Check compatibility
	if (LAYERS > 0)
		if (layers[LAYERS - 1].output_dim != layer.input_dim)
			throw invalid_argument("Layer dimensions don't match");

	// Add to layers
	layers.push_back(layer);
	LAYERS++;
	
	// Register parameters
	Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
	Parameter b = model.add_parameters({layer.output_dim});
	params.push_back({W, b});
}

/**
	* \brief Run the MLP on an input vector/batch
	*
	* \param x : Input expression (vector or batch)
	* \param cg : Computation graph
	*
	* \return 
*/
Expression MLP::run(Expression x, ComputationGraph& cg)
{
	Expression h_cur = x; // Expression for the current hidden state
	for (unsigned l = 0; l < LAYERS; ++l) 
	{
		/* Initialize parameters in computation graph */
		Expression W = parameter(cg, params[l][0]);
		Expression b = parameter(cg, params[l][1]);
		
		Expression a = affine_transform({b, W, h_cur}); // Aplly affine transform
		Expression h = activate(a, layers[l].activation); // Apply activation function
		Expression h_dropped; // Take care of dropout
		if (layers[l].dropout_rate > 0) 
		{
			if (dropout_active) 
			{
				// During training, drop random units
				Expression mask = random_bernoulli(cg, {layers[l].output_dim}, 1 - layers[l].dropout_rate);
				h_dropped = cmult(h, mask);
			} 
			else 
			{
				h_dropped = h * (1 - layers[l].dropout_rate); // At test time, multiply by the retention rate to scale
			}
		} 
		else 
			h_dropped = h; // If there's no dropout, don't do anything
		h_cur = h_dropped; // Set current hidden state
	}

	return h_cur;
}

/**
	* \brief Return the negative log likelihood for the (batched) pair (x,y)
	*
	* \param x : Input batch
	* \param labels : Output labels
	* \param cg : Computation graph
	* 
	* \return Expression for the negative log likelihood on the batch
*/
Expression MLP::get_nll(Expression x, vector<unsigned int> labels, ComputationGraph& cg) 
{
	Expression y = run(x, cg); // compute output
	Expression losses = pickneglogsoftmax(y, labels); // Do softmax
	
	return sum_batches(losses); // Sum across batches
}

/**
	* \brief Predict the most probable label
	* \details Returns the argmax of the softmax of the networks output
	*
	* \param x : Input
	* \param cg : Computation graph
	*
	* \return Label index
*/
int MLP::predict(Expression x, ComputationGraph& cg) 
{
	Expression y = run(x, cg); // run MLP to get class distribution
	vector<float> probs = as_vector(cg.forward(y)); // Get values
	
	// Get argmax
	unsigned argmax = 0;
	for (unsigned i = 1; i < probs.size(); ++i) 
	{
		if (probs[i] > probs[argmax])
			argmax = i;
	}

	return argmax;
}

/**
	* \brief Enable dropout
	* \details This is supposed to be used during training or during testing if you want to sample outputs using montecarlo
*/
void MLP::enable_dropout() 
{
	dropout_active = true;
}

/**
	* \brief Disable dropout
	* \details Do this during testing if you want a deterministic network
*/
void MLP::disable_dropout() 
{
	dropout_active = false;
}

/**
	* \brief Check wether dropout is enabled or not
	*
	* \return Dropout state
*/
bool MLP::is_dropout_enabled() 
{
	return dropout_active;
}

inline Expression MLP::activate(Expression h, Activation f) 
{
	switch (f) 
	{
		case LINEAR:
			return h;
			break;
		case RELU:
			return rectify(h);
			break;
		case SIGMOID:
			return logistic(h);
			break;
		case TANH:
			return tanh(h);
			break;
		case SOFTMAX:
			return softmax(h);
			break;
		default:
			throw invalid_argument("Unknown activation function");
			break;
	}
}

void MLP::read_struct_mlp(char* filename)
{
	ifstream file(filename, ios::in);
	if(!file)
	{ 
		cerr << "Impossible d'ouvrir le fichier " << filename << endl;
		exit(EXIT_FAILURE);
	}
	float tmp[4];
	
	while(file >> tmp[0]) //input_dim
	{
		file >> tmp[1]; //output_dim
		file >> tmp[2]; //activation rate
		file >> tmp[3]; //dropout
		
		Layer tmp_layer(tmp[0], tmp[1], tmp[2], tmp[3]);
		layers.push_back(tmp_layer);
	}
}