Skip to content
Snippets Groups Projects
Commit 540e0902 authored by Marjorie Armando's avatar Marjorie Armando
Browse files

construct the MLP structure automatically from a file

parent a3f93cb0
No related branches found
No related tags found
No related merge requests found
......@@ -26,7 +26,7 @@ Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, flo
*/
MLP::MLP(ParameterCollection & model)
{
LAYERS = layers.size();
LAYERS = 0;
}
/**
......@@ -36,45 +36,45 @@ MLP::MLP(ParameterCollection & model)
* \param model : ParameterCollection (to contain parameters)
* \param filename : file containing the MLP's structure
*/
MLP::MLP(ParameterCollection& model, char* filename/*vector<Layer> layers*/)
MLP::MLP(ParameterCollection& model, char* filename)
{
read_struct_mlp(filename);
// Verify layers compatibility
for (unsigned l = 0; l < layers.size() - 1; ++l)
ifstream file(filename, ios::in);
if(!file)
{
if (layers[l].output_dim != layers[l + 1].input_dim)
throw invalid_argument("Layer dimensions don't match");
cerr << "Impossible d'ouvrir le fichier " << filename << endl;
exit(EXIT_FAILURE);
}
unsigned int input_dim, output_dim;
float dropout;
Activation activ_fonction;
string fonction;
// Register parameters in model
for (Layer layer : layers)
while(file >> input_dim) //input_dim
{
append(model, layer);
}
}
/**
* \brief Append a layer at the end of the network
*
* \param model
* \param layer
*/
void MLP::append(ParameterCollection& model, Layer layer)
if(input_dim == 0) //saute les commentaires
{
// Check compatibility
if (LAYERS > 0)
if (layers[LAYERS - 1].output_dim != layer.input_dim)
throw invalid_argument("Layer dimensions don't match");
getline(file, fonction);
continue;
}
file >> output_dim;
file >> fonction;
activ_fonction = activation_fonction(fonction);
file >> dropout;
// Add to layers
layers.push_back(layer);
Layer tmp_layer(input_dim, output_dim, activ_fonction, dropout);
layers.push_back(tmp_layer);
LAYERS++;
if (LAYERS > 1 && layers[LAYERS-1].input_dim != layers[LAYERS-2].output_dim)
throw invalid_argument("Layer dimensions don't match");
// Register parameters
Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
Parameter b = model.add_parameters({layer.output_dim});
Parameter W = model.add_parameters({layers[LAYERS-1].output_dim, layers[LAYERS-1].input_dim});
Parameter b = model.add_parameters({layers[LAYERS-1].output_dim});
params.push_back({W, b});
}
file.close();
}
/**
* \brief Run the MLP on an input vector/batch
......@@ -212,24 +212,12 @@ inline Expression MLP::activate(Expression h, Activation f)
}
}
void MLP::read_struct_mlp(char* filename)
{
ifstream file(filename, ios::in);
if(!file)
{
cerr << "Impossible d'ouvrir le fichier " << filename << endl;
exit(EXIT_FAILURE);
}
float tmp[4];
while(file >> tmp[0]) //input_dim
Activation MLP::activation_fonction(string fonction)
{
file >> tmp[1]; //output_dim
file >> tmp[2]; //activation rate
file >> tmp[3]; //dropout
Layer tmp_layer(tmp[0], tmp[1], tmp[2], tmp[3]);
layers.push_back(tmp_layer);
}
if(fonction=="LINEAR") return LINEAR;
if(fonction=="RELU") return RELU;
if(fonction=="SIGMOID") return SIGMOID;
if(fonction=="TANH") return TANH;
if(fonction=="SOFTMAX") return SOFTMAX;
return RELU; //defaut
}
......@@ -11,7 +11,7 @@
#include <fstream>
#include <sstream>
#include <algorithm>
#include <vector>
#include <string>
/**
......@@ -57,12 +57,11 @@
bool dropout_active = true;
public:
void read_struct_mlp(char* filename);
MLP(dynet::ParameterCollection & model);
MLP(dynet::ParameterCollection& model, char* filename/*std::vector<Layer> layers*/);
void append(dynet::ParameterCollection& model, Layer layer);
MLP(dynet::ParameterCollection& model, char* filename);
Activation activation_fonction(std::string fonction);
dynet::Expression run(dynet::Expression x, dynet::ComputationGraph& cg);
dynet::Expression get_nll(dynet::Expression x, std::vector</*dynet::real*/unsigned int> labels, dynet::ComputationGraph& cg);
dynet::Expression get_nll(dynet::Expression x, std::vector<unsigned int> labels, dynet::ComputationGraph& cg);
int predict(dynet::Expression x, dynet::ComputationGraph& cg);
void enable_dropout();
void disable_dropout();
......
/**
USAGE :
./trainCFF train_file dev_file batch_size nb_epochs
Ex fichier pour la structure du MLP (commence par 0 = commentaire) :
0 for each layer : input_dim output_dim activation dropout
0 activation : SIGMOID, TANH, RELU, LINEAR, SOFTMAX
5 50 RELU 0.5
50 60 RELU 0.5
60 3 LINEAR 0
**/
/**
Corriger cmake pour inclure Dynet
Lire le fichier de Flo et Seb
init le vecteur de Layer (push_back)
*
Représentation one-hot
Représentation embedding
**/
......@@ -16,13 +18,13 @@ Représentation embedding
#include <iostream>
#include <fstream>
#include "train_cff.hpp"
#include "dynet/io.h"
#include "dynet/io.h"/*
extern "C"
{
#include "feat_model.h"
#include "cf_file.h"
}
*/
#define NB_FEATS 5
......@@ -34,7 +36,7 @@ using namespace dynet;
/**
* Recupere a partir d'un fichier fm la dimention de l'entree du reseau
* */
/*
int read_fm(char *filename_fm, char *cff_filename){
int dim, input_dim = 0;
feat_model * feat_m = feat_model_read(filename_fm, NULL ,0);
......@@ -45,10 +47,7 @@ int read_fm(char *filename_fm, char *cff_filename){
input_dim += tab[i];
}
return input_dim;
}
}*/
/**
* Mets les features du fichier train dans la matrice cff_train
......@@ -56,7 +55,7 @@ int read_fm(char *filename_fm, char *cff_filename){
* Mets les labels du fichier train dans le vecteur cff_train_labels
* Mets les labels du fichier dev dans le vecteur cff_dev_labels
**/
void read_files(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train,
void init_dataset(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train,
vector<vector<float>> &cff_dev, vector<unsigned int> &cff_train_labels,
vector<unsigned int> &cff_dev_labels)
{
......@@ -98,7 +97,7 @@ int main(int argc, char** argv)
if(argc > 7 || argc < 6)
{
cerr << "Usage : " << argv[0]
<< "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> [model_file<string>]\n";
<< "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> mlp_struct_file<string> [model_file<string>]\n";
exit(EXIT_FAILURE);
}
......@@ -106,10 +105,8 @@ int main(int argc, char** argv)
auto dyparams = dynet::extract_dynet_params(argc, argv);
dynet::initialize(dyparams);
// Fetch program specific parameters (see ../utils/cl-args.h) ------------------------------------
/*Params params;
get_args(argc, argv, params, TRAIN_SUP);*/ //pas utile pour nous
unsigned int batch_size = atoi(argv[3]); // Mettre 297
// Init variables --------------------------------------------------------------------------------
unsigned int batch_size = atoi(argv[3]);
int nb_epochs = atoi(argv[4]);
......@@ -124,18 +121,13 @@ int main(int argc, char** argv)
// Load Dataset ----------------------------------------------------------------------------------
vector<vector<float>> cff_train, cff_dev; //matrice de features (data) (float pour etre "large")
vector<unsigned int> cff_train_labels, cff_dev_labels; //tableau de targets (labels)
read_files(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels);
int input_dim = read_fm(argv[5], argv[1]);
init_dataset(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels);
//int input_dim = read_fm(argv[5], argv[1]);
// ParameterCollection name (for saving) ---------------------------------------------------------
ostringstream os;
// Store a bunch of information in the model name
os << "mlp" << getpid() << ".params";
/*<< "_" << 784 << "-" << 512 << "-relu-" << 0.2
<< "_" << 512 << "-" << 512 << "-relu-" << 0.2
<< "_" << 512 << "-" << 10 << "-softmax"
<< "_" << getpid()
<< ".params";*/
const string fname = os.str();
cerr << "Parameters will be written to: " << fname << endl;
......@@ -146,29 +138,22 @@ int main(int argc, char** argv)
trainer.clip_threshold *= batch_size;
// Create model
MLP nn(model, vector<Layer>({
Layer(/* input_dim (nb de features) */ input_dim, /* output_dim */ 50, /* activation */ RELU, /* dropout_rate */ 0.2),
Layer(/* input_dim */ 50, /* output_dim */ 100, /* activation */ RELU, /* dropout_rate */ 0.2),
Layer(/* input_dim */ 100, /* output_dim */ 150, /* activation */ RELU, /* dropout_rate */ 0.2),
Layer(/* input_dim */ 150, /* output_dim (nb de classes possibles) */ 3, /* activation */ LINEAR, /* dropout_rate */ 0.0)
}));
MLP nn(model, argv[5]);
// Load preexisting weights (if provided)
if (argv[6] != NULL)
/**if (argv[6] != NULL)
{
TextFileLoader loader(argv[6]);
loader.populate(model);
}
}*/
// Initialize variables for training -------------------------------------------------------------
// Worst accuracy
double worst = 0;
// Number of batches in training set
unsigned int num_batches = cff_train.size() / batch_size - 1; //921;
//nb de ligne / batch size-1
//cout << "NUMBER OF BATCHES = " << num_batches <<endl;
unsigned int num_batches = cff_train.size() / batch_size - 1;
// Random indexing
unsigned int si;
......@@ -205,7 +190,6 @@ int main(int argc, char** argv)
// Compute batch start id and size
int id = order[si] * batch_size;
unsigned int bsize = std::min((unsigned int) cff_train.size() - id, batch_size);
// cout <<"BSIZE = "<<bsize<<endl;
// Get input batch
cur_batch = vector<Expression>(bsize);
......@@ -232,7 +216,7 @@ int main(int argc, char** argv)
// Update parameters
trainer.update();
//cout<<"SI= "<<si<<endl;
// Print progress every tenth of the dataset
if ((si + 1) % (num_batches / 10) == 0 || si == num_batches - 1)
{
......@@ -255,7 +239,6 @@ int main(int argc, char** argv)
if (si == num_batches)
{
double dpos = 0;
int nb_errors = 0;
for (unsigned int i = 0; i < cff_dev.size(); ++i)
{
// build graph for this instance
......@@ -270,8 +253,6 @@ int main(int argc, char** argv)
// Increment count of positive classification
if (predicted_idx == cff_dev_labels[i])
dpos++;
/*else
++nb_errors;*/
if(epoch+1 == nb_epochs)
predicted_file << predicted_idx << endl;
......@@ -288,7 +269,6 @@ int main(int argc, char** argv)
// Print informations
cerr << "\n***DEV [epoch=" << epoch+1
<< "] E = " << (dpos / (double) cff_dev.size()) << ' ';
//cerr << "Success rate = " << 100-(float)nb_errors*100/cff_dev_labels.size() << "%\n\n";
// Reinitialize timer
iteration.reset(new Timer("completed in"));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment