Skip to content
Snippets Groups Projects
Commit 9b8f2f13 authored by Marjorie Armando's avatar Marjorie Armando
Browse files

initial commit

parent a4242c24
Branches
No related tags found
No related merge requests found
#include "train_cff.hpp"
using namespace std;
using namespace dynet;
/**
* \brief Build a feed forward layer
*
* \param input_dim : Input dimension
* \param output_dim : Output dimension
* \param activation : Activation function
* \param dropout_rate : Dropout rate
*/
Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate):
input_dim(input_dim),
output_dim(output_dim),
activation(activation),
dropout_rate(dropout_rate)
{
}
/**
* \brief Default constructor
* \details Dont forget to add layers !
*/
MLP::MLP(ParameterCollection & model)
{
LAYERS = 0;
}
/**
* \brief Returns a Multilayer perceptron
* \details Creates a feedforward multilayer perceptron based on a list of layer descriptions
*
* \param model : ParameterCollection (to contain parameters)
* \param layers : Layers description
*/
MLP::MLP(ParameterCollection& model, vector<Layer> layers)
{
// Verify layers compatibility
for (unsigned l = 0; l < layers.size() - 1; ++l)
{
if (layers[l].output_dim != layers[l + 1].input_dim)
throw invalid_argument("Layer dimensions don't match");
}
// Register parameters in model
for (Layer layer : layers)
{
append(model, layer);
}
}
/**
* \brief Append a layer at the end of the network
*
* \param model
* \param layer
*/
void MLP::append(ParameterCollection& model, Layer layer)
{
// Check compatibility
if (LAYERS > 0)
if (layers[LAYERS - 1].output_dim != layer.input_dim)
throw invalid_argument("Layer dimensions don't match");
// Add to layers
layers.push_back(layer);
LAYERS++;
// Register parameters
Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
Parameter b = model.add_parameters({layer.output_dim});
params.push_back({W, b});
}
/**
* \brief Run the MLP on an input vector/batch
*
* \param x : Input expression (vector or batch)
* \param cg : Computation graph
*
* \return
*/
Expression MLP::run(Expression x, ComputationGraph& cg)
{
Expression h_cur = x; // Expression for the current hidden state
for (unsigned l = 0; l < LAYERS; ++l)
{
/* Initialize parameters in computation graph */
Expression W = parameter(cg, params[l][0]);
Expression b = parameter(cg, params[l][1]);
Expression a = affine_transform({b, W, h_cur}); // Aplly affine transform
Expression h = activate(a, layers[l].activation); // Apply activation function
Expression h_dropped; // Take care of dropout
if (layers[l].dropout_rate > 0)
{
if (dropout_active)
{
// During training, drop random units
Expression mask = random_bernoulli(cg, {layers[l].output_dim}, 1 - layers[l].dropout_rate);
h_dropped = cmult(h, mask);
}
else
{
h_dropped = h * (1 - layers[l].dropout_rate); // At test time, multiply by the retention rate to scale
}
}
else
h_dropped = h; // If there's no dropout, don't do anything
h_cur = h_dropped; // Set current hidden state
}
return h_cur;
}
/**
* \brief Return the negative log likelihood for the (batched) pair (x,y)
*
* \param x : Input batch
* \param labels : Output labels
* \param cg : Computation graph
*
* \return Expression for the negative log likelihood on the batch
*/
Expression MLP::get_nll(Expression x, vector<unsigned int> labels, ComputationGraph& cg)
{
Expression y = run(x, cg); // compute output
Expression losses = pickneglogsoftmax(y, labels); // Do softmax
return sum_batches(losses); // Sum across batches
}
/**
* \brief Predict the most probable label
* \details Returns the argmax of the softmax of the networks output
*
* \param x : Input
* \param cg : Computation graph
*
* \return Label index
*/
int MLP::predict(Expression x, ComputationGraph& cg)
{
Expression y = run(x, cg); // run MLP to get class distribution
vector<float> probs = as_vector(cg.forward(y)); // Get values
// Get argmax
unsigned argmax = 0;
for (unsigned i = 1; i < probs.size(); ++i)
{
if (probs[i] > probs[argmax])
argmax = i;
}
return argmax;
}
/**
* \brief Enable dropout
* \details This is supposed to be used during training or during testing if you want to sample outputs using montecarlo
*/
void MLP::enable_dropout()
{
dropout_active = true;
}
/**
* \brief Disable dropout
* \details Do this during testing if you want a deterministic network
*/
void MLP::disable_dropout()
{
dropout_active = false;
}
/**
* \brief Check wether dropout is enabled or not
*
* \return Dropout state
*/
bool MLP::is_dropout_enabled()
{
return dropout_active;
}
inline Expression MLP::activate(Expression h, Activation f)
{
switch (f)
{
case LINEAR:
return h;
break;
case RELU:
return rectify(h);
break;
case SIGMOID:
return logistic(h);
break;
case TANH:
return tanh(h);
break;
case SOFTMAX:
return softmax(h);
break;
default:
throw invalid_argument("Unknown activation function");
break;
}
}
#ifndef TRAIN_CFF
#define TRAIN_CFF
#include "dynet/nodes.h"
#include "dynet/dynet.h"
#include "dynet/training.h"
#include "dynet/timing.h"
#include "dynet/expr.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <vector>
/**
* Common activation functions used in multilayer perceptrons
*/
enum Activation
{
SIGMOID,
TANH,
RELU,
LINEAR,
SOFTMAX
};
/**
* \ingroup ffbuilders
* \struct Layer
* \brief Layer structure
* \details Contains all parameters defining a layer
*/
struct Layer
{
public:
unsigned int input_dim; /* Input dimension */
unsigned int output_dim; /* Output dimension */
Activation activation = LINEAR; /* Activation function */
float dropout_rate = 0; /* Dropout rate */
Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate);
Layer() {};
};
/**
* \struct MLP
* \brief Multilayer perceptron
*/
struct MLP
{
protected:
unsigned int LAYERS = 0; // Hyper-parameters
std::vector<Layer> layers; // Layers
std::vector<std::vector<dynet::Parameter>> params; // Parameters
bool dropout_active = true;
public:
MLP(dynet::ParameterCollection & model);
MLP(dynet::ParameterCollection& model, std::vector<Layer> layers);
void append(dynet::ParameterCollection& model, Layer layer);
dynet::Expression run(dynet::Expression x, dynet::ComputationGraph& cg);
dynet::Expression get_nll(dynet::Expression x, std::vector</*dynet::real*/unsigned int> labels, dynet::ComputationGraph& cg);
int predict(dynet::Expression x, dynet::ComputationGraph& cg);
void enable_dropout();
void disable_dropout();
bool is_dropout_enabled();
private:
inline dynet::Expression activate(dynet::Expression h, Activation f);
};
#endif
/**
USAGE :
./trainCFF train_file dev_file batch_size nb_epochs
**/
#include <iostream>
#include <fstream>
#include "train_cff.hpp"
#include "dynet/io.h"
extern "C"
{
#include "feat_model.h"
#include "cf_file.h"
}
#define NB_FEATS 5
using namespace std;
using namespace dynet;
/**
* Recupere a partir d'un fichier fm la dimention de l'entree du reseau
* */
int read_fm(char *filename_fm, char *cff_filename){
int dim, input_dim = 0;
feat_model * feat_m = feat_model_read(filename_fm, NULL ,0);
dim = feat_m->dim;
int *tab = cff_max_value_per_column(cff_filename, dim);
for(int i = 0; i < dim; i++){
input_dim += tab[i];
}
return input_dim;
}
/**
* Mets les features du fichier train dans la matrice cff_train
* Mets les features du fichier dev dans la matrice cff_dev
* Mets les labels du fichier train dans le vecteur cff_train_labels
* Mets les labels du fichier dev dans le vecteur cff_dev_labels
**/
void read_files(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train,
vector<vector<float>> &cff_dev, vector<unsigned int> &cff_train_labels,
vector<unsigned int> &cff_dev_labels)
{
ifstream train_file(train_filename, ios::in);
ifstream dev_file(dev_filename, ios::in);
if(!train_file)
{
cerr << "Impossible d'ouvrir le fichier " << train_filename << endl;
exit(EXIT_FAILURE);
}
if(!dev_file)
{
cerr << "Impossible d'ouvrir le fichier " << dev_filename << endl;
exit(EXIT_FAILURE);
}
vector<float> tmp_vect(NB_FEATS);
unsigned int tmp;
while(train_file >> tmp)
{
cff_train_labels.push_back(tmp);
for(unsigned int i=0; i<NB_FEATS; ++i)
train_file >> tmp_vect[i];
cff_train.push_back(tmp_vect);
}
while(dev_file >> tmp)
{
cff_dev_labels.push_back(tmp);
for(unsigned int i=0; i<NB_FEATS; ++i)
dev_file >> tmp_vect[i];
cff_dev.push_back(tmp_vect);
}
}
int main(int argc, char** argv)
{
if(argc > 7 || argc < 6)
{
cerr << "Usage : " << argv[0]
<< "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> [model_file<string>]\n";
exit(EXIT_FAILURE);
}
// Fetch dynet params ----------------------------------------------------------------------------
auto dyparams = dynet::extract_dynet_params(argc, argv);
dynet::initialize(dyparams);
// Fetch program specific parameters (see ../utils/cl-args.h) ------------------------------------
/*Params params;
get_args(argc, argv, params, TRAIN_SUP);*/ //pas utile pour nous
unsigned int batch_size = atoi(argv[3]); // Mettre 297
int nb_epochs = atoi(argv[4]);
// Output file containing the dev file's predictions ---------------------------------------------
fstream predicted_file("predictions.txt", ios::in | ios::out | ios::trunc);
if(!predicted_file)
{
cerr << "Problème avec le fichier predictions.txt \n";
exit(EXIT_FAILURE);
}
// Load Dataset ----------------------------------------------------------------------------------
vector<vector<float>> cff_train, cff_dev; //matrice de features (data) (float pour etre "large")
vector<unsigned int> cff_train_labels, cff_dev_labels; //tableau de targets (labels)
read_files(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels);
int input_dim = read_fm(argv[5], argv[1]);
// ParameterCollection name (for saving) ---------------------------------------------------------
ostringstream os;
// Store a bunch of information in the model name
os << "mlp" << getpid() << ".params";
/*<< "_" << 784 << "-" << 512 << "-relu-" << 0.2
<< "_" << 512 << "-" << 512 << "-relu-" << 0.2
<< "_" << 512 << "-" << 10 << "-softmax"
<< "_" << getpid()
<< ".params";*/
const string fname = os.str();
cerr << "Parameters will be written to: " << fname << endl;
// Build model -----------------------------------------------------------------------------------
ParameterCollection model;
// Use Adam optimizer
AdamTrainer trainer(model);
trainer.clip_threshold *= batch_size;
// Create model
MLP nn(model, vector<Layer>({
Layer(/* input_dim (nb de features) */ 5, /* output_dim */ 50, /* activation */ RELU, /* dropout_rate */ 0.2),
Layer(/* input_dim */ 50, /* output_dim */ 100, /* activation */ RELU, /* dropout_rate */ 0.2),
Layer(/* input_dim */ 100, /* output_dim */ 150, /* activation */ RELU, /* dropout_rate */ 0.2),
Layer(/* input_dim */ 150, /* output_dim (nb de classes possibles) */ 3, /* activation */ LINEAR, /* dropout_rate */ 0.0)
}));
// Load preexisting weights (if provided)
if (argv[6] != NULL)
{
TextFileLoader loader(argv[6]);
loader.populate(model);
}
// Initialize variables for training -------------------------------------------------------------
// Worst accuracy
double worst = 0;
// Number of batches in training set
unsigned int num_batches = cff_train.size() / batch_size - 1; //921;
//nb de ligne / batch size-1
//cout << "NUMBER OF BATCHES = " << num_batches <<endl;
// Random indexing
unsigned int si;
vector<unsigned int> order(num_batches);
for (unsigned int i = 0; i < num_batches; ++i)
order[i] = i;
int epoch = 0;
vector<Expression> cur_batch;
vector<unsigned int> cur_labels;
// Run for the given number of epochs (or indefinitely if nb_epochs is negative)
while (epoch < nb_epochs || nb_epochs < 0)
{
// Reshuffle the dataset
cerr << "**SHUFFLE\n";
random_shuffle(order.begin(), order.end());
// Initialize loss and number of samples processed (to average loss)
double loss = 0;
double num_samples = 0;
// Start timer
std::unique_ptr<Timer> iteration(new Timer("completed in"));
// Activate dropout
nn.enable_dropout();
for (si = 0; si < num_batches; ++si)
{
// build graph for this instance
ComputationGraph cg;
// Compute batch start id and size
int id = order[si] * batch_size;
unsigned int bsize = std::min((unsigned int) cff_train.size() - id, batch_size);
// cout <<"BSIZE = "<<bsize<<endl;
// Get input batch
cur_batch = vector<Expression>(bsize);
cur_labels = vector<unsigned int>(bsize);
for (unsigned int idx = 0; idx < bsize; ++idx)
{
cur_batch[idx] = input(cg, {5}, cff_train[id + idx]);
cur_labels[idx] = cff_train_labels[id + idx];
}
// Reshape as batch (not very intuitive yet)
Expression x_batch = reshape(concatenate_cols(cur_batch), Dim({5}, bsize));
// Get negative log likelihood on batch
Expression loss_expr = nn.get_nll(x_batch, cur_labels, cg);
// Get scalar error for monitoring
loss += as_scalar(cg.forward(loss_expr));
// Increment number of samples processed
num_samples += bsize;
// Compute gradient with backward pass
cg.backward(loss_expr);
// Update parameters
trainer.update();
//cout<<"SI= "<<si<<endl;
// Print progress every tenth of the dataset
if ((si + 1) % (num_batches / 10) == 0 || si == num_batches - 1)
{
// Print informations
trainer.status();
cerr << " E = " << (loss / num_samples) << ' ';
// Reinitialize timer
iteration.reset(new Timer("completed in"));
// Reinitialize loss
loss = 0;
num_samples = 0;
}
}
// Disable dropout for dev testing
nn.disable_dropout();
// Show score on dev data
if (si == num_batches)
{
double dpos = 0;
int nb_errors = 0;
for (unsigned int i = 0; i < cff_dev.size(); ++i)
{
// build graph for this instance
ComputationGraph cg;
// Get input expression
Expression x = input(cg, {5}, cff_dev[i]);
// Get negative log likelihood on batch
int predicted_idx = nn.predict(x, cg);
// Increment count of positive classification
if (predicted_idx == cff_dev_labels[i])
dpos++;
/*else
++nb_errors;*/
if(epoch+1 == nb_epochs)
predicted_file << predicted_idx << endl;
}
// If the dev loss is lower than the previous ones, save the model
if (dpos > worst)
{
worst = dpos;
TextFileSaver saver(fname);
saver.save(model);
}
// Print informations
cerr << "\n***DEV [epoch=" << epoch+1
<< "] E = " << (dpos / (double) cff_dev.size()) << ' ';
//cerr << "Success rate = " << 100-(float)nb_errors*100/cff_dev_labels.size() << "%\n\n";
// Reinitialize timer
iteration.reset(new Timer("completed in"));
}
// Increment epoch
++epoch;
}
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment