initial commit

9b8f2f13 · Marjorie Armando · a4242c24 · 9b8f2f13 · 9b8f2f13 · 9b8f2f13
Commit 9b8f2f13 authored 7 years ago by Marjorie Armando
--- a/maca_trans_parser/src/train_cff.cpp
+++ b/maca_trans_parser/src/train_cff.cpp
+#include "train_cff.hpp"
+
+using namespace std;
+using namespace dynet;
+
+/**
+	* \brief Build a feed forward layer
+	* 
+	* \param input_dim : Input dimension
+	* \param output_dim : Output dimension
+	* \param activation : Activation function
+	* \param dropout_rate : Dropout rate
+*/
+Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate):
+	input_dim(input_dim),
+	output_dim(output_dim),
+	activation(activation),
+	dropout_rate(dropout_rate) 
+{
+
+}
+
+/**
+	* \brief Default constructor
+	* \details Dont forget to add layers !
+*/
+MLP::MLP(ParameterCollection & model)
+{
+	LAYERS = 0;
+}
+
+/**
+	* \brief Returns a Multilayer perceptron
+	* \details Creates a feedforward multilayer perceptron based on a list of layer descriptions
+	*
+	* \param model  : ParameterCollection (to contain parameters)
+	* \param layers : Layers description
+*/
+MLP::MLP(ParameterCollection& model, vector<Layer> layers) 
+{
+	// Verify layers compatibility
+	for (unsigned l = 0; l < layers.size() - 1; ++l) 
+	{
+		if (layers[l].output_dim != layers[l + 1].input_dim)
+			throw invalid_argument("Layer dimensions don't match");
+	}
+
+	// Register parameters in model
+	for (Layer layer : layers) 
+	{
+		append(model, layer);
+	}
+}
+
+/**
+	* \brief Append a layer at the end of the network
+	*
+	* \param model 
+	* \param layer 
+*/
+void MLP::append(ParameterCollection& model, Layer layer) 
+{
+	// Check compatibility
+	if (LAYERS > 0)
+		if (layers[LAYERS - 1].output_dim != layer.input_dim)
+			throw invalid_argument("Layer dimensions don't match");
+
+	// Add to layers
+	layers.push_back(layer);
+	LAYERS++;
+	
+	// Register parameters
+	Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
+	Parameter b = model.add_parameters({layer.output_dim});
+	params.push_back({W, b});
+}
+
+/**
+	* \brief Run the MLP on an input vector/batch
+	*
+	* \param x : Input expression (vector or batch)
+	* \param cg : Computation graph
+	*
+	* \return 
+*/
+Expression MLP::run(Expression x, ComputationGraph& cg)
+{
+	Expression h_cur = x; // Expression for the current hidden state
+	for (unsigned l = 0; l < LAYERS; ++l) 
+	{
+		/* Initialize parameters in computation graph */
+		Expression W = parameter(cg, params[l][0]);
+		Expression b = parameter(cg, params[l][1]);
+		
+		Expression a = affine_transform({b, W, h_cur}); // Aplly affine transform
+		Expression h = activate(a, layers[l].activation); // Apply activation function
+		Expression h_dropped; // Take care of dropout
+		if (layers[l].dropout_rate > 0) 
+		{
+			if (dropout_active) 
+			{
+				// During training, drop random units
+				Expression mask = random_bernoulli(cg, {layers[l].output_dim}, 1 - layers[l].dropout_rate);
+				h_dropped = cmult(h, mask);
+			} 
+			else 
+			{
+				h_dropped = h * (1 - layers[l].dropout_rate); // At test time, multiply by the retention rate to scale
+			}
+		} 
+		else 
+			h_dropped = h; // If there's no dropout, don't do anything
+		h_cur = h_dropped; // Set current hidden state
+	}
+
+	return h_cur;
+}
+
+/**
+	* \brief Return the negative log likelihood for the (batched) pair (x,y)
+	*
+	* \param x : Input batch
+	* \param labels : Output labels
+	* \param cg : Computation graph
+	* 
+	* \return Expression for the negative log likelihood on the batch
+*/
+Expression MLP::get_nll(Expression x, vector<unsigned int> labels, ComputationGraph& cg) 
+{
+	Expression y = run(x, cg); // compute output
+	Expression losses = pickneglogsoftmax(y, labels); // Do softmax
+	
+	return sum_batches(losses); // Sum across batches
+}
+
+/**
+	* \brief Predict the most probable label
+	* \details Returns the argmax of the softmax of the networks output
+	*
+	* \param x : Input
+	* \param cg : Computation graph
+	*
+	* \return Label index
+*/
+int MLP::predict(Expression x, ComputationGraph& cg) 
+{
+	Expression y = run(x, cg); // run MLP to get class distribution
+	vector<float> probs = as_vector(cg.forward(y)); // Get values
+	
+	// Get argmax
+	unsigned argmax = 0;
+	for (unsigned i = 1; i < probs.size(); ++i) 
+	{
+		if (probs[i] > probs[argmax])
+			argmax = i;
+	}
+
+	return argmax;
+}
+
+/**
+	* \brief Enable dropout
+	* \details This is supposed to be used during training or during testing if you want to sample outputs using montecarlo
+*/
+void MLP::enable_dropout() 
+{
+	dropout_active = true;
+}
+
+/**
+	* \brief Disable dropout
+	* \details Do this during testing if you want a deterministic network
+*/
+void MLP::disable_dropout() 
+{
+	dropout_active = false;
+}
+
+/**
+	* \brief Check wether dropout is enabled or not
+	*
+	* \return Dropout state
+*/
+bool MLP::is_dropout_enabled() 
+{
+	return dropout_active;
+}
+
+inline Expression MLP::activate(Expression h, Activation f) 
+{
+	switch (f) 
+	{
+		case LINEAR:
+			return h;
+			break;
+		case RELU:
+			return rectify(h);
+			break;
+		case SIGMOID:
+			return logistic(h);
+			break;
+		case TANH:
+			return tanh(h);
+			break;
+		case SOFTMAX:
+			return softmax(h);
+			break;
+		default:
+			throw invalid_argument("Unknown activation function");
+			break;
+	}
+}
--- a/maca_trans_parser/src/train_cff.hpp
+++ b/maca_trans_parser/src/train_cff.hpp
+#ifndef TRAIN_CFF
+#define TRAIN_CFF
+
+	#include "dynet/nodes.h"
+	#include "dynet/dynet.h"
+	#include "dynet/training.h"
+	#include "dynet/timing.h"
+	#include "dynet/expr.h"
+
+	#include <iostream>
+	#include <fstream>
+	#include <sstream>
+	#include <algorithm>
+	#include <vector>
+
+
+	/**
+		* Common activation functions used in multilayer perceptrons
+	*/
+	enum Activation 
+	{
+		SIGMOID, 
+		TANH, 
+		RELU, 
+		LINEAR, 
+		SOFTMAX 
+	};
+
+	/**
+		* \ingroup ffbuilders
+		* \struct Layer
+		* \brief Layer structure
+		* \details Contains all parameters defining a layer
+	*/
+	struct Layer 
+	{
+		public:
+			unsigned int input_dim; /* Input dimension */
+			unsigned int output_dim; /* Output dimension */
+			Activation activation = LINEAR; /* Activation function */
+			float dropout_rate = 0; /* Dropout rate */
+			
+			Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate);
+			Layer() {};
+	};
+
+	/**
+		* \struct MLP
+		* \brief Multilayer perceptron
+	*/
+	struct MLP 
+	{
+		protected:
+			unsigned int LAYERS = 0; // Hyper-parameters
+			std::vector<Layer> layers; // Layers
+			std::vector<std::vector<dynet::Parameter>> params; // Parameters
+			bool dropout_active = true;
+
+		public:
+			MLP(dynet::ParameterCollection & model);
+			MLP(dynet::ParameterCollection& model, std::vector<Layer> layers);
+			void append(dynet::ParameterCollection& model, Layer layer);
+			dynet::Expression run(dynet::Expression x, dynet::ComputationGraph& cg);
+			dynet::Expression get_nll(dynet::Expression x, std::vector</*dynet::real*/unsigned int> labels, dynet::ComputationGraph& cg);
+			int predict(dynet::Expression x, dynet::ComputationGraph& cg);
+			void enable_dropout();
+			void disable_dropout();
+			bool is_dropout_enabled();
+
+		private:
+			inline dynet::Expression activate(dynet::Expression h, Activation f);
+	};
+
+#endif 
--- a/maca_trans_parser/src/train_dynet.cpp
+++ b/maca_trans_parser/src/train_dynet.cpp
+/**
+    USAGE : 
+    ./trainCFF train_file dev_file batch_size nb_epochs
+**/
+
+#include <iostream>
+#include <fstream>
+#include "train_cff.hpp"
+#include "dynet/io.h"
+extern "C" 
+{
+	#include "feat_model.h"
+	#include "cf_file.h"
+}
+
+
+#define NB_FEATS 5
+
+using namespace std;
+using namespace dynet;
+
+
+
+
+/**
+ * Recupere a partir d'un fichier fm la dimention de l'entree du reseau
+ * */
+ 
+int read_fm(char *filename_fm, char *cff_filename){
+	int dim, input_dim = 0;
+	feat_model * feat_m = feat_model_read(filename_fm, NULL ,0);
+	dim = feat_m->dim;
+	
+	int *tab = cff_max_value_per_column(cff_filename, dim); 
+	for(int i = 0; i < dim; i++){
+		input_dim += tab[i];
+	}
+	return input_dim;
+}
+
+
+
+
+/**
+	* Mets les features du fichier train dans la matrice cff_train
+	* Mets les features du fichier dev dans la matrice cff_dev
+	* Mets les labels du fichier train dans le vecteur cff_train_labels
+	* Mets les labels du fichier dev dans le vecteur cff_dev_labels
+**/
+void read_files(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train, 
+				vector<vector<float>> &cff_dev, vector<unsigned int> &cff_train_labels, 
+				vector<unsigned int> &cff_dev_labels)
+{
+	ifstream train_file(train_filename, ios::in);
+	ifstream dev_file(dev_filename, ios::in);
+	if(!train_file)
+	{ 
+		cerr << "Impossible d'ouvrir le fichier " << train_filename << endl;
+		exit(EXIT_FAILURE);
+	}
+	if(!dev_file)
+	{ 
+		cerr << "Impossible d'ouvrir le fichier " << dev_filename << endl;
+		exit(EXIT_FAILURE);
+	}		
+	
+	vector<float> tmp_vect(NB_FEATS);
+	unsigned int tmp;
+	
+	while(train_file >> tmp)
+	{
+		cff_train_labels.push_back(tmp);
+		for(unsigned int i=0; i<NB_FEATS; ++i)
+			train_file >> tmp_vect[i];
+		cff_train.push_back(tmp_vect);		
+	}
+	
+	while(dev_file >> tmp)
+	{
+		cff_dev_labels.push_back(tmp);
+		for(unsigned int i=0; i<NB_FEATS; ++i)
+			dev_file >> tmp_vect[i];
+		cff_dev.push_back(tmp_vect);
+	}
+}
+	
+int main(int argc, char** argv) 
+{ 	
+	if(argc > 7 || argc < 6)
+	{
+		cerr << "Usage : " << argv[0] 
+			 << "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> [model_file<string>]\n";
+		exit(EXIT_FAILURE);
+	}
+	
+	// Fetch dynet params ----------------------------------------------------------------------------
+	auto dyparams = dynet::extract_dynet_params(argc, argv);
+	dynet::initialize(dyparams);
+	
+	// Fetch program specific parameters (see ../utils/cl-args.h) ------------------------------------
+	/*Params params;
+	get_args(argc, argv, params, TRAIN_SUP);*/ //pas utile pour nous
+	unsigned int batch_size = atoi(argv[3]); // Mettre 297 
+	int nb_epochs = atoi(argv[4]); 
+	
+	
+	// Output file containing the dev file's predictions ---------------------------------------------
+	fstream predicted_file("predictions.txt", ios::in | ios::out | ios::trunc);
+	if(!predicted_file)
+	{
+		cerr << "Problème avec le fichier predictions.txt \n";
+		exit(EXIT_FAILURE);
+	}
+
+	// Load Dataset ----------------------------------------------------------------------------------
+	vector<vector<float>> cff_train, cff_dev; //matrice de features (data) (float pour etre "large")
+	vector<unsigned int> cff_train_labels, cff_dev_labels; //tableau de targets (labels)
+	read_files(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels);	
+	int input_dim = read_fm(argv[5], argv[1]);
+
+	// ParameterCollection name (for saving) ---------------------------------------------------------
+	ostringstream os;
+	// Store a bunch of information in the model name
+	os  << "mlp" << getpid() << ".params";
+		/*<< "_" << 784 << "-" << 512 << "-relu-" << 0.2
+		<< "_" << 512 << "-" << 512 << "-relu-" << 0.2
+		<< "_" << 512 << "-" << 10 << "-softmax"
+		<< "_" << getpid()
+		<< ".params";*/
+	const string fname = os.str();
+	cerr << "Parameters will be written to: " << fname << endl;
+	
+	// Build model -----------------------------------------------------------------------------------
+	ParameterCollection model;
+	// Use Adam optimizer
+	AdamTrainer trainer(model);
+	trainer.clip_threshold *= batch_size;
+
+	// Create model
+	MLP nn(model, vector<Layer>({
+		Layer(/* input_dim (nb de features) */ 5, /* output_dim */ 50, /* activation */ RELU, /* dropout_rate */ 0.2),
+		Layer(/* input_dim */ 50, /* output_dim */ 100, /* activation */ RELU, /* dropout_rate */ 0.2),
+		Layer(/* input_dim */ 100, /* output_dim */ 150, /* activation */ RELU, /* dropout_rate */ 0.2),
+		Layer(/* input_dim */ 150, /* output_dim (nb de classes possibles) */ 3, /* activation */ LINEAR, /* dropout_rate */ 0.0)
+	}));
+
+	
+	// Load preexisting weights (if provided)
+	if (argv[6] != NULL) 
+	{
+		TextFileLoader loader(argv[6]);
+		loader.populate(model);
+	}
+	
+	// Initialize variables for training -------------------------------------------------------------
+	// Worst accuracy
+	double worst = 0;
+
+	// Number of batches in training set
+	unsigned int num_batches = cff_train.size()  / batch_size - 1; //921;
+								   //nb de ligne / batch size-1
+	//cout << "NUMBER OF BATCHES = " << num_batches <<endl;
+
+	// Random indexing
+	unsigned int si;
+	vector<unsigned int> order(num_batches);
+	for (unsigned int i = 0; i < num_batches; ++i) 
+		order[i] = i;
+
+	int epoch = 0;
+	vector<Expression> cur_batch;
+	vector<unsigned int> cur_labels;
+
+	// Run for the given number of epochs (or indefinitely if nb_epochs is negative)
+	while (epoch < nb_epochs || nb_epochs < 0) 
+	{
+		// Reshuffle the dataset
+		cerr << "**SHUFFLE\n";
+		random_shuffle(order.begin(), order.end());
+		
+		// Initialize loss and number of samples processed (to average loss)
+		double loss = 0;
+		double num_samples = 0;
+
+		// Start timer
+		std::unique_ptr<Timer> iteration(new Timer("completed in"));
+
+		// Activate dropout
+		nn.enable_dropout();
+
+		for (si = 0; si < num_batches; ++si) 
+		{
+			// build graph for this instance
+			ComputationGraph cg;
+			
+			// Compute batch start id and size
+			int id = order[si] * batch_size;
+			unsigned int bsize = std::min((unsigned int) cff_train.size() - id, batch_size);
+			// cout <<"BSIZE = "<<bsize<<endl;
+			
+			// Get input batch
+			cur_batch = vector<Expression>(bsize);
+			cur_labels = vector<unsigned int>(bsize);
+			for (unsigned int idx = 0; idx < bsize; ++idx) 
+			{
+				cur_batch[idx] = input(cg, {5}, cff_train[id + idx]);
+				cur_labels[idx] = cff_train_labels[id + idx];
+			}
+			// Reshape as batch (not very intuitive yet)
+			Expression x_batch = reshape(concatenate_cols(cur_batch), Dim({5}, bsize));
+			
+			// Get negative log likelihood on batch
+			Expression loss_expr = nn.get_nll(x_batch, cur_labels, cg);
+			
+			// Get scalar error for monitoring
+			loss += as_scalar(cg.forward(loss_expr));
+			
+			// Increment number of samples processed
+			num_samples += bsize;
+			
+			// Compute gradient with backward pass
+			cg.backward(loss_expr);
+			
+			// Update parameters
+			trainer.update();
+			//cout<<"SI= "<<si<<endl;
+			// Print progress every tenth of the dataset
+			if ((si + 1) % (num_batches / 10) == 0 || si == num_batches - 1) 
+			{
+				// Print informations
+				trainer.status();
+				cerr << " E = " << (loss / num_samples) << ' ';
+				
+				// Reinitialize timer
+				iteration.reset(new Timer("completed in"));
+				
+				// Reinitialize loss
+				loss = 0;
+				num_samples = 0;
+			}
+		}
+		// Disable dropout for dev testing
+		nn.disable_dropout();
+
+		// Show score on dev data
+		if (si == num_batches) 
+		{
+			double dpos = 0;
+			int nb_errors = 0;
+			for (unsigned int i = 0; i < cff_dev.size(); ++i) 
+			{
+				// build graph for this instance
+				ComputationGraph cg;
+				
+				// Get input expression
+				Expression x = input(cg, {5}, cff_dev[i]);
+				
+				// Get negative log likelihood on batch
+				int predicted_idx = nn.predict(x, cg);
+				
+				// Increment count of positive classification
+				if (predicted_idx == cff_dev_labels[i])
+					dpos++;
+				/*else
+					++nb_errors;*/
+					
+				if(epoch+1 == nb_epochs)
+					predicted_file << predicted_idx << endl;
+			}
+			
+			// If the dev loss is lower than the previous ones, save the model
+			if (dpos > worst) 
+			{
+				worst = dpos;
+				TextFileSaver saver(fname);
+				saver.save(model);
+			}
+			
+			// Print informations
+			cerr << "\n***DEV [epoch=" << epoch+1
+				<< "] E = " << (dpos / (double) cff_dev.size()) << ' ';
+			//cerr << "Success rate = " << 100-(float)nb_errors*100/cff_dev_labels.size() << "%\n\n";
+			
+			// Reinitialize timer
+			iteration.reset(new Timer("completed in"));
+		}
+		// Increment epoch
+		++epoch;
+	}
+	
+	return 0;
+}