Skip to content
Snippets Groups Projects
Select Git revision
  • a3f93cb0b54f64bcdd8927595f5fd19720a95f4b
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

train_cff.cpp

Blame
  • Marjorie Armando's avatar
    Marjorie Armando authored
    a3f93cb0
    History
    train_cff.cpp 5.29 KiB
    #include "train_cff.hpp"
    
    using namespace std;
    using namespace dynet;
    
    /**
    	* \brief Build a feed forward layer
    	* 
    	* \param input_dim : Input dimension
    	* \param output_dim : Output dimension
    	* \param activation : Activation function
    	* \param dropout_rate : Dropout rate
    */
    Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate):
    	input_dim(input_dim),
    	output_dim(output_dim),
    	activation(activation),
    	dropout_rate(dropout_rate) 
    {
    
    }
    
    /**
    	* \brief Default constructor
    	* \details Dont forget to add layers !
    */
    MLP::MLP(ParameterCollection & model)
    {
    	LAYERS = layers.size();
    }
    
    /**
    	* \brief Returns a Multilayer perceptron
    	* \details Creates a feedforward multilayer perceptron based on a list of layer descriptions
    	*
    	* \param model  : ParameterCollection (to contain parameters)
    	* \param filename : file containing the MLP's structure
    */
    MLP::MLP(ParameterCollection& model, char* filename/*vector<Layer> layers*/) 
    {
    	read_struct_mlp(filename);
    	// Verify layers compatibility
    	for (unsigned l = 0; l < layers.size() - 1; ++l) 
    	{
    		if (layers[l].output_dim != layers[l + 1].input_dim)
    			throw invalid_argument("Layer dimensions don't match");
    	}
    
    	// Register parameters in model
    	for (Layer layer : layers) 
    	{
    		append(model, layer);
    	}
    }
    
    /**
    	* \brief Append a layer at the end of the network
    	*
    	* \param model 
    	* \param layer 
    */
    void MLP::append(ParameterCollection& model, Layer layer) 
    {
    	// Check compatibility
    	if (LAYERS > 0)
    		if (layers[LAYERS - 1].output_dim != layer.input_dim)
    			throw invalid_argument("Layer dimensions don't match");
    
    	// Add to layers
    	layers.push_back(layer);
    	LAYERS++;
    	
    	// Register parameters
    	Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
    	Parameter b = model.add_parameters({layer.output_dim});
    	params.push_back({W, b});
    }
    
    /**
    	* \brief Run the MLP on an input vector/batch
    	*
    	* \param x : Input expression (vector or batch)
    	* \param cg : Computation graph
    	*
    	* \return 
    */
    Expression MLP::run(Expression x, ComputationGraph& cg)
    {
    	Expression h_cur = x; // Expression for the current hidden state
    	for (unsigned l = 0; l < LAYERS; ++l) 
    	{
    		/* Initialize parameters in computation graph */
    		Expression W = parameter(cg, params[l][0]);
    		Expression b = parameter(cg, params[l][1]);
    		
    		Expression a = affine_transform({b, W, h_cur}); // Aplly affine transform
    		Expression h = activate(a, layers[l].activation); // Apply activation function
    		Expression h_dropped; // Take care of dropout
    		if (layers[l].dropout_rate > 0) 
    		{
    			if (dropout_active) 
    			{
    				// During training, drop random units
    				Expression mask = random_bernoulli(cg, {layers[l].output_dim}, 1 - layers[l].dropout_rate);
    				h_dropped = cmult(h, mask);
    			} 
    			else 
    			{
    				h_dropped = h * (1 - layers[l].dropout_rate); // At test time, multiply by the retention rate to scale
    			}
    		} 
    		else 
    			h_dropped = h; // If there's no dropout, don't do anything
    		h_cur = h_dropped; // Set current hidden state
    	}
    
    	return h_cur;
    }
    
    /**
    	* \brief Return the negative log likelihood for the (batched) pair (x,y)
    	*
    	* \param x : Input batch
    	* \param labels : Output labels
    	* \param cg : Computation graph
    	* 
    	* \return Expression for the negative log likelihood on the batch
    */
    Expression MLP::get_nll(Expression x, vector<unsigned int> labels, ComputationGraph& cg) 
    {
    	Expression y = run(x, cg); // compute output
    	Expression losses = pickneglogsoftmax(y, labels); // Do softmax
    	
    	return sum_batches(losses); // Sum across batches
    }
    
    /**
    	* \brief Predict the most probable label
    	* \details Returns the argmax of the softmax of the networks output
    	*
    	* \param x : Input
    	* \param cg : Computation graph
    	*
    	* \return Label index
    */
    int MLP::predict(Expression x, ComputationGraph& cg) 
    {
    	Expression y = run(x, cg); // run MLP to get class distribution
    	vector<float> probs = as_vector(cg.forward(y)); // Get values
    	
    	// Get argmax
    	unsigned argmax = 0;
    	for (unsigned i = 1; i < probs.size(); ++i) 
    	{
    		if (probs[i] > probs[argmax])
    			argmax = i;
    	}
    
    	return argmax;
    }
    
    /**
    	* \brief Enable dropout
    	* \details This is supposed to be used during training or during testing if you want to sample outputs using montecarlo
    */
    void MLP::enable_dropout() 
    {
    	dropout_active = true;
    }
    
    /**
    	* \brief Disable dropout
    	* \details Do this during testing if you want a deterministic network
    */
    void MLP::disable_dropout() 
    {
    	dropout_active = false;
    }
    
    /**
    	* \brief Check wether dropout is enabled or not
    	*
    	* \return Dropout state
    */
    bool MLP::is_dropout_enabled() 
    {
    	return dropout_active;
    }
    
    inline Expression MLP::activate(Expression h, Activation f) 
    {
    	switch (f) 
    	{
    		case LINEAR:
    			return h;
    			break;
    		case RELU:
    			return rectify(h);
    			break;
    		case SIGMOID:
    			return logistic(h);
    			break;
    		case TANH:
    			return tanh(h);
    			break;
    		case SOFTMAX:
    			return softmax(h);
    			break;
    		default:
    			throw invalid_argument("Unknown activation function");
    			break;
    	}
    }
    
    void MLP::read_struct_mlp(char* filename)
    {
    	ifstream file(filename, ios::in);
    	if(!file)
    	{ 
    		cerr << "Impossible d'ouvrir le fichier " << filename << endl;
    		exit(EXIT_FAILURE);
    	}
    	float tmp[4];
    	
    	while(file >> tmp[0]) //input_dim
    	{
    		file >> tmp[1]; //output_dim
    		file >> tmp[2]; //activation rate
    		file >> tmp[3]; //dropout
    		
    		Layer tmp_layer(tmp[0], tmp[1], tmp[2], tmp[3]);
    		layers.push_back(tmp_layer);
    	}
    }