construct the MLP structure automatically from a file

540e0902 · Marjorie Armando · a3f93cb0 · 540e0902 · 540e0902 · 540e0902
Commit 540e0902 authored 7 years ago by Marjorie Armando
--- a/maca_trans_parser/src/train_cff.cpp
+++ b/maca_trans_parser/src/train_cff.cpp
@@ -26,7 +26,7 @@ Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, flo
 */
 MLP::MLP(ParameterCollection & model)
 {
-	LAYERS = layers.size();
+	LAYERS = 0;
 }

 /**
@@ -36,45 +36,45 @@ MLP::MLP(ParameterCollection & model)
 	* \param model  : ParameterCollection (to contain parameters)
 	* \param filename : file containing the MLP's structure
 */
-MLP::MLP(ParameterCollection& model, char* filename/*vector<Layer> layers*/) 
+MLP::MLP(ParameterCollection& model, char* filename) 
 {
-	read_struct_mlp(filename);
-	// Verify layers compatibility
-	for (unsigned l = 0; l < layers.size() - 1; ++l) 
+	ifstream file(filename, ios::in);
+	if(!file)
 	{ 
-		if (layers[l].output_dim != layers[l + 1].input_dim)
-			throw invalid_argument("Layer dimensions don't match");
+		cerr << "Impossible d'ouvrir le fichier " << filename << endl;
+		exit(EXIT_FAILURE);
 	}
+	unsigned int input_dim, output_dim;
+	float dropout;
+	Activation activ_fonction;
+	string fonction;
 	
-	// Register parameters in model
-	for (Layer layer : layers) 
+	while(file >> input_dim) //input_dim
 	{
-		append(model, layer);
-	}
-}
-
-/**
-	* \brief Append a layer at the end of the network
-	*
-	* \param model 
-	* \param layer 
-*/
-void MLP::append(ParameterCollection& model, Layer layer) 
+		if(input_dim == 0) //saute les commentaires
 		{
-	// Check compatibility
-	if (LAYERS > 0)
-		if (layers[LAYERS - 1].output_dim != layer.input_dim)
-			throw invalid_argument("Layer dimensions don't match");
+			getline(file, fonction); 
+			continue;
+		}
+		file >> output_dim; 
+		file >> fonction;
+		activ_fonction = activation_fonction(fonction);
+		file >> dropout;
 		
-	// Add to layers
-	layers.push_back(layer);
+		Layer tmp_layer(input_dim, output_dim, activ_fonction, dropout);
+		layers.push_back(tmp_layer);
 		LAYERS++;
 		
+		if (LAYERS > 1 && layers[LAYERS-1].input_dim != layers[LAYERS-2].output_dim)
+			throw invalid_argument("Layer dimensions don't match");
+	
 		// Register parameters
-	Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
-	Parameter b = model.add_parameters({layer.output_dim});
+		Parameter W = model.add_parameters({layers[LAYERS-1].output_dim, layers[LAYERS-1].input_dim});
+		Parameter b = model.add_parameters({layers[LAYERS-1].output_dim});
 		params.push_back({W, b});
 	}
+	file.close();
+}

 /**
 	* \brief Run the MLP on an input vector/batch
@@ -212,24 +212,12 @@ inline Expression MLP::activate(Expression h, Activation f)
 	}
 }

-void MLP::read_struct_mlp(char* filename)
-{
-	ifstream file(filename, ios::in);
-	if(!file)
-	{ 
-		cerr << "Impossible d'ouvrir le fichier " << filename << endl;
-		exit(EXIT_FAILURE);
-	}
-	float tmp[4];
-	
-	while(file >> tmp[0]) //input_dim
+Activation MLP::activation_fonction(string fonction)
 {
-		file >> tmp[1]; //output_dim
-		file >> tmp[2]; //activation rate
-		file >> tmp[3]; //dropout
-		
-		Layer tmp_layer(tmp[0], tmp[1], tmp[2], tmp[3]);
-		layers.push_back(tmp_layer);
-	}
+	if(fonction=="LINEAR") return LINEAR;
+	if(fonction=="RELU") return RELU;
+	if(fonction=="SIGMOID") return SIGMOID;
+	if(fonction=="TANH") return TANH;
+	if(fonction=="SOFTMAX") return SOFTMAX;
+	return RELU; //defaut
 } 
-
--- a/maca_trans_parser/src/train_cff.hpp
+++ b/maca_trans_parser/src/train_cff.hpp
@@ -11,7 +11,7 @@
 	#include <fstream>
 	#include <sstream>
 	#include <algorithm>
-	#include <vector>
+	#include <string>


 	/**
@@ -57,12 +57,11 @@
 			bool dropout_active = true;

 		public:
-			void read_struct_mlp(char* filename);
 			MLP(dynet::ParameterCollection & model);
-			MLP(dynet::ParameterCollection& model, char* filename/*std::vector<Layer> layers*/);
-			void append(dynet::ParameterCollection& model, Layer layer);
+			MLP(dynet::ParameterCollection& model, char* filename);
+			Activation activation_fonction(std::string fonction);
 			dynet::Expression run(dynet::Expression x, dynet::ComputationGraph& cg);
-			dynet::Expression get_nll(dynet::Expression x, std::vector</*dynet::real*/unsigned int> labels, dynet::ComputationGraph& cg);
+			dynet::Expression get_nll(dynet::Expression x, std::vector<unsigned int> labels, dynet::ComputationGraph& cg);
 			int predict(dynet::Expression x, dynet::ComputationGraph& cg);
 			void enable_dropout();
 			void disable_dropout();

--- a/maca_trans_parser/src/train_dynet.cpp
+++ b/maca_trans_parser/src/train_dynet.cpp
 /**
-    USAGE : 
-    ./trainCFF train_file dev_file batch_size nb_epochs
+    Ex fichier pour la structure du MLP (commence par 0 = commentaire) :
+    
+    0 for each layer : input_dim output_dim activation dropout 
+	0 activation : SIGMOID, TANH, RELU, LINEAR, SOFTMAX 
+	5 50 RELU 0.5
+	50 60 RELU 0.5
+	60 3 LINEAR 0
 **/


 /**
 Corriger cmake pour inclure Dynet
-Lire le fichier de Flo et Seb
-init le vecteur de Layer (push_back)
-* 
 Représentation one-hot
 Représentation embedding
 **/
@@ -16,13 +18,13 @@ Représentation embedding
 #include <iostream>
 #include <fstream>
 #include "train_cff.hpp"
-#include "dynet/io.h"
+#include "dynet/io.h"/*
 extern "C" 
 {
 	#include "feat_model.h"
 	#include "cf_file.h"
 }
-
+*/

 #define NB_FEATS 5

@@ -34,7 +36,7 @@ using namespace dynet;
 /**
 * Recupere a partir d'un fichier fm la dimention de l'entree du reseau
 * */
- 
+ /*
 int read_fm(char *filename_fm, char *cff_filename){
 	int dim, input_dim = 0;
 	feat_model * feat_m = feat_model_read(filename_fm, NULL ,0);
@@ -45,10 +47,7 @@ int read_fm(char *filename_fm, char *cff_filename){
 		input_dim += tab[i];
 	}
 	return input_dim;
-}
-
-
-
+}*/

 /**
 	* Mets les features du fichier train dans la matrice cff_train
@@ -56,7 +55,7 @@ int read_fm(char *filename_fm, char *cff_filename){
 	* Mets les labels du fichier train dans le vecteur cff_train_labels
 	* Mets les labels du fichier dev dans le vecteur cff_dev_labels
 **/
-void read_files(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train, 
+void init_dataset(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train, 
 				vector<vector<float>> &cff_dev, vector<unsigned int> &cff_train_labels, 
 				vector<unsigned int> &cff_dev_labels)
 {
@@ -98,7 +97,7 @@ int main(int argc, char** argv)
 	if(argc > 7 || argc < 6)
 	{
 		cerr << "Usage : " << argv[0] 
-			 << "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> [model_file<string>]\n";
+			 << "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> mlp_struct_file<string> [model_file<string>]\n";
 		exit(EXIT_FAILURE);
 	}
 	
@@ -106,10 +105,8 @@ int main(int argc, char** argv)
 	auto dyparams = dynet::extract_dynet_params(argc, argv);
 	dynet::initialize(dyparams);
 	
-	// Fetch program specific parameters (see ../utils/cl-args.h) ------------------------------------
-	/*Params params;
-	get_args(argc, argv, params, TRAIN_SUP);*/ //pas utile pour nous
-	unsigned int batch_size = atoi(argv[3]); // Mettre 297 
+	// Init variables --------------------------------------------------------------------------------
+	unsigned int batch_size = atoi(argv[3]); 
 	int nb_epochs = atoi(argv[4]); 
 	
 	
@@ -124,18 +121,13 @@ int main(int argc, char** argv)
 	// Load Dataset ----------------------------------------------------------------------------------
 	vector<vector<float>> cff_train, cff_dev; //matrice de features (data) (float pour etre "large")
 	vector<unsigned int> cff_train_labels, cff_dev_labels; //tableau de targets (labels)
-	read_files(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels);	
-	int input_dim = read_fm(argv[5], argv[1]);
+	init_dataset(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels);	
+	//int input_dim = read_fm(argv[5], argv[1]);

 	// ParameterCollection name (for saving) ---------------------------------------------------------
 	ostringstream os;
 	// Store a bunch of information in the model name
 	os  << "mlp" << getpid() << ".params";
-		/*<< "_" << 784 << "-" << 512 << "-relu-" << 0.2
-		<< "_" << 512 << "-" << 512 << "-relu-" << 0.2
-		<< "_" << 512 << "-" << 10 << "-softmax"
-		<< "_" << getpid()
-		<< ".params";*/
 	const string fname = os.str();
 	cerr << "Parameters will be written to: " << fname << endl;
 	
@@ -146,29 +138,22 @@ int main(int argc, char** argv)
 	trainer.clip_threshold *= batch_size;
 	
 	// Create model
-	MLP nn(model, vector<Layer>({
-		Layer(/* input_dim (nb de features) */ input_dim, /* output_dim */ 50, /* activation */ RELU, /* dropout_rate */ 0.2),
-		Layer(/* input_dim */ 50, /* output_dim */ 100, /* activation */ RELU, /* dropout_rate */ 0.2),
-		Layer(/* input_dim */ 100, /* output_dim */ 150, /* activation */ RELU, /* dropout_rate */ 0.2),
-		Layer(/* input_dim */ 150, /* output_dim (nb de classes possibles) */ 3, /* activation */ LINEAR, /* dropout_rate */ 0.0)
-	}));
+	MLP nn(model, argv[5]);

 	
 	// Load preexisting weights (if provided)
-	if (argv[6] != NULL) 
+	/**if (argv[6] != NULL) 
 	{
 		TextFileLoader loader(argv[6]);
 		loader.populate(model);
-	}
+	}*/
 	
 	// Initialize variables for training -------------------------------------------------------------
 	// Worst accuracy
 	double worst = 0;

 	// Number of batches in training set
-	unsigned int num_batches = cff_train.size()  / batch_size - 1; //921;
-								   //nb de ligne / batch size-1
-	//cout << "NUMBER OF BATCHES = " << num_batches <<endl;
+	unsigned int num_batches = cff_train.size()  / batch_size - 1; 

 	// Random indexing
 	unsigned int si;
@@ -205,7 +190,6 @@ int main(int argc, char** argv)
 			// Compute batch start id and size
 			int id = order[si] * batch_size;
 			unsigned int bsize = std::min((unsigned int) cff_train.size() - id, batch_size);
-			// cout <<"BSIZE = "<<bsize<<endl;
 			
 			// Get input batch
 			cur_batch = vector<Expression>(bsize);
@@ -232,7 +216,7 @@ int main(int argc, char** argv)
 			
 			// Update parameters
 			trainer.update();
-			//cout<<"SI= "<<si<<endl;
+			
 			// Print progress every tenth of the dataset
 			if ((si + 1) % (num_batches / 10) == 0 || si == num_batches - 1) 
 			{
@@ -255,7 +239,6 @@ int main(int argc, char** argv)
 		if (si == num_batches) 
 		{
 			double dpos = 0;
-			int nb_errors = 0;
 			for (unsigned int i = 0; i < cff_dev.size(); ++i) 
 			{
 				// build graph for this instance
@@ -270,8 +253,6 @@ int main(int argc, char** argv)
 				// Increment count of positive classification
 				if (predicted_idx == cff_dev_labels[i])
 					dpos++;
-				/*else
-					++nb_errors;*/
 					
 				if(epoch+1 == nb_epochs)
 					predicted_file << predicted_idx << endl;
@@ -288,7 +269,6 @@ int main(int argc, char** argv)
 			// Print informations
 			cerr << "\n***DEV [epoch=" << epoch+1
 				<< "] E = " << (dpos / (double) cff_dev.size()) << ' ';
-			//cerr << "Success rate = " << 100-(float)nb_errors*100/cff_dev_labels.size() << "%\n\n";
 			
 			// Reinitialize timer
 			iteration.reset(new Timer("completed in"));