diff --git a/maca_trans_parser/src/train_cff.cpp b/maca_trans_parser/src/train_cff.cpp new file mode 100644 index 0000000000000000000000000000000000000000..67f91e1a6c9a64d937adb8dc1b001c1ba5150632 --- /dev/null +++ b/maca_trans_parser/src/train_cff.cpp @@ -0,0 +1,212 @@ +#include "train_cff.hpp" + +using namespace std; +using namespace dynet; + +/** + * \brief Build a feed forward layer + * + * \param input_dim : Input dimension + * \param output_dim : Output dimension + * \param activation : Activation function + * \param dropout_rate : Dropout rate +*/ +Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate): + input_dim(input_dim), + output_dim(output_dim), + activation(activation), + dropout_rate(dropout_rate) +{ + +} + +/** + * \brief Default constructor + * \details Dont forget to add layers ! +*/ +MLP::MLP(ParameterCollection & model) +{ + LAYERS = 0; +} + +/** + * \brief Returns a Multilayer perceptron + * \details Creates a feedforward multilayer perceptron based on a list of layer descriptions + * + * \param model : ParameterCollection (to contain parameters) + * \param layers : Layers description +*/ +MLP::MLP(ParameterCollection& model, vector<Layer> layers) +{ + // Verify layers compatibility + for (unsigned l = 0; l < layers.size() - 1; ++l) + { + if (layers[l].output_dim != layers[l + 1].input_dim) + throw invalid_argument("Layer dimensions don't match"); + } + + // Register parameters in model + for (Layer layer : layers) + { + append(model, layer); + } +} + +/** + * \brief Append a layer at the end of the network + * + * \param model + * \param layer +*/ +void MLP::append(ParameterCollection& model, Layer layer) +{ + // Check compatibility + if (LAYERS > 0) + if (layers[LAYERS - 1].output_dim != layer.input_dim) + throw invalid_argument("Layer dimensions don't match"); + + // Add to layers + layers.push_back(layer); + LAYERS++; + + // Register parameters + Parameter W = model.add_parameters({layer.output_dim, layer.input_dim}); + Parameter b = model.add_parameters({layer.output_dim}); + params.push_back({W, b}); +} + +/** + * \brief Run the MLP on an input vector/batch + * + * \param x : Input expression (vector or batch) + * \param cg : Computation graph + * + * \return +*/ +Expression MLP::run(Expression x, ComputationGraph& cg) +{ + Expression h_cur = x; // Expression for the current hidden state + for (unsigned l = 0; l < LAYERS; ++l) + { + /* Initialize parameters in computation graph */ + Expression W = parameter(cg, params[l][0]); + Expression b = parameter(cg, params[l][1]); + + Expression a = affine_transform({b, W, h_cur}); // Aplly affine transform + Expression h = activate(a, layers[l].activation); // Apply activation function + Expression h_dropped; // Take care of dropout + if (layers[l].dropout_rate > 0) + { + if (dropout_active) + { + // During training, drop random units + Expression mask = random_bernoulli(cg, {layers[l].output_dim}, 1 - layers[l].dropout_rate); + h_dropped = cmult(h, mask); + } + else + { + h_dropped = h * (1 - layers[l].dropout_rate); // At test time, multiply by the retention rate to scale + } + } + else + h_dropped = h; // If there's no dropout, don't do anything + h_cur = h_dropped; // Set current hidden state + } + + return h_cur; +} + +/** + * \brief Return the negative log likelihood for the (batched) pair (x,y) + * + * \param x : Input batch + * \param labels : Output labels + * \param cg : Computation graph + * + * \return Expression for the negative log likelihood on the batch +*/ +Expression MLP::get_nll(Expression x, vector<unsigned int> labels, ComputationGraph& cg) +{ + Expression y = run(x, cg); // compute output + Expression losses = pickneglogsoftmax(y, labels); // Do softmax + + return sum_batches(losses); // Sum across batches +} + +/** + * \brief Predict the most probable label + * \details Returns the argmax of the softmax of the networks output + * + * \param x : Input + * \param cg : Computation graph + * + * \return Label index +*/ +int MLP::predict(Expression x, ComputationGraph& cg) +{ + Expression y = run(x, cg); // run MLP to get class distribution + vector<float> probs = as_vector(cg.forward(y)); // Get values + + // Get argmax + unsigned argmax = 0; + for (unsigned i = 1; i < probs.size(); ++i) + { + if (probs[i] > probs[argmax]) + argmax = i; + } + + return argmax; +} + +/** + * \brief Enable dropout + * \details This is supposed to be used during training or during testing if you want to sample outputs using montecarlo +*/ +void MLP::enable_dropout() +{ + dropout_active = true; +} + +/** + * \brief Disable dropout + * \details Do this during testing if you want a deterministic network +*/ +void MLP::disable_dropout() +{ + dropout_active = false; +} + +/** + * \brief Check wether dropout is enabled or not + * + * \return Dropout state +*/ +bool MLP::is_dropout_enabled() +{ + return dropout_active; +} + +inline Expression MLP::activate(Expression h, Activation f) +{ + switch (f) + { + case LINEAR: + return h; + break; + case RELU: + return rectify(h); + break; + case SIGMOID: + return logistic(h); + break; + case TANH: + return tanh(h); + break; + case SOFTMAX: + return softmax(h); + break; + default: + throw invalid_argument("Unknown activation function"); + break; + } +} diff --git a/maca_trans_parser/src/train_cff.hpp b/maca_trans_parser/src/train_cff.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e697aed0d792de74ee63a6518265e35f2061d0cf --- /dev/null +++ b/maca_trans_parser/src/train_cff.hpp @@ -0,0 +1,74 @@ +#ifndef TRAIN_CFF +#define TRAIN_CFF + + #include "dynet/nodes.h" + #include "dynet/dynet.h" + #include "dynet/training.h" + #include "dynet/timing.h" + #include "dynet/expr.h" + + #include <iostream> + #include <fstream> + #include <sstream> + #include <algorithm> + #include <vector> + + + /** + * Common activation functions used in multilayer perceptrons + */ + enum Activation + { + SIGMOID, + TANH, + RELU, + LINEAR, + SOFTMAX + }; + + /** + * \ingroup ffbuilders + * \struct Layer + * \brief Layer structure + * \details Contains all parameters defining a layer + */ + struct Layer + { + public: + unsigned int input_dim; /* Input dimension */ + unsigned int output_dim; /* Output dimension */ + Activation activation = LINEAR; /* Activation function */ + float dropout_rate = 0; /* Dropout rate */ + + Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate); + Layer() {}; + }; + + /** + * \struct MLP + * \brief Multilayer perceptron + */ + struct MLP + { + protected: + unsigned int LAYERS = 0; // Hyper-parameters + std::vector<Layer> layers; // Layers + std::vector<std::vector<dynet::Parameter>> params; // Parameters + bool dropout_active = true; + + public: + MLP(dynet::ParameterCollection & model); + MLP(dynet::ParameterCollection& model, std::vector<Layer> layers); + void append(dynet::ParameterCollection& model, Layer layer); + dynet::Expression run(dynet::Expression x, dynet::ComputationGraph& cg); + dynet::Expression get_nll(dynet::Expression x, std::vector</*dynet::real*/unsigned int> labels, dynet::ComputationGraph& cg); + int predict(dynet::Expression x, dynet::ComputationGraph& cg); + void enable_dropout(); + void disable_dropout(); + bool is_dropout_enabled(); + + private: + inline dynet::Expression activate(dynet::Expression h, Activation f); + }; + +#endif diff --git a/maca_trans_parser/src/train_dynet.cpp b/maca_trans_parser/src/train_dynet.cpp new file mode 100644 index 0000000000000000000000000000000000000000..957e7150be2b0a2df5d415de4165ac19826a2b6d --- /dev/null +++ b/maca_trans_parser/src/train_dynet.cpp @@ -0,0 +1,292 @@ +/** + USAGE : + ./trainCFF train_file dev_file batch_size nb_epochs +**/ + +#include <iostream> +#include <fstream> +#include "train_cff.hpp" +#include "dynet/io.h" +extern "C" +{ + #include "feat_model.h" + #include "cf_file.h" +} + + +#define NB_FEATS 5 + +using namespace std; +using namespace dynet; + + + + +/** + * Recupere a partir d'un fichier fm la dimention de l'entree du reseau + * */ + +int read_fm(char *filename_fm, char *cff_filename){ + int dim, input_dim = 0; + feat_model * feat_m = feat_model_read(filename_fm, NULL ,0); + dim = feat_m->dim; + + int *tab = cff_max_value_per_column(cff_filename, dim); + for(int i = 0; i < dim; i++){ + input_dim += tab[i]; + } + return input_dim; +} + + + + +/** + * Mets les features du fichier train dans la matrice cff_train + * Mets les features du fichier dev dans la matrice cff_dev + * Mets les labels du fichier train dans le vecteur cff_train_labels + * Mets les labels du fichier dev dans le vecteur cff_dev_labels +**/ +void read_files(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train, + vector<vector<float>> &cff_dev, vector<unsigned int> &cff_train_labels, + vector<unsigned int> &cff_dev_labels) +{ + ifstream train_file(train_filename, ios::in); + ifstream dev_file(dev_filename, ios::in); + if(!train_file) + { + cerr << "Impossible d'ouvrir le fichier " << train_filename << endl; + exit(EXIT_FAILURE); + } + if(!dev_file) + { + cerr << "Impossible d'ouvrir le fichier " << dev_filename << endl; + exit(EXIT_FAILURE); + } + + vector<float> tmp_vect(NB_FEATS); + unsigned int tmp; + + while(train_file >> tmp) + { + cff_train_labels.push_back(tmp); + for(unsigned int i=0; i<NB_FEATS; ++i) + train_file >> tmp_vect[i]; + cff_train.push_back(tmp_vect); + } + + while(dev_file >> tmp) + { + cff_dev_labels.push_back(tmp); + for(unsigned int i=0; i<NB_FEATS; ++i) + dev_file >> tmp_vect[i]; + cff_dev.push_back(tmp_vect); + } +} + +int main(int argc, char** argv) +{ + if(argc > 7 || argc < 6) + { + cerr << "Usage : " << argv[0] + << "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> [model_file<string>]\n"; + exit(EXIT_FAILURE); + } + + // Fetch dynet params ---------------------------------------------------------------------------- + auto dyparams = dynet::extract_dynet_params(argc, argv); + dynet::initialize(dyparams); + + // Fetch program specific parameters (see ../utils/cl-args.h) ------------------------------------ + /*Params params; + get_args(argc, argv, params, TRAIN_SUP);*/ //pas utile pour nous + unsigned int batch_size = atoi(argv[3]); // Mettre 297 + int nb_epochs = atoi(argv[4]); + + + // Output file containing the dev file's predictions --------------------------------------------- + fstream predicted_file("predictions.txt", ios::in | ios::out | ios::trunc); + if(!predicted_file) + { + cerr << "Problème avec le fichier predictions.txt \n"; + exit(EXIT_FAILURE); + } + + // Load Dataset ---------------------------------------------------------------------------------- + vector<vector<float>> cff_train, cff_dev; //matrice de features (data) (float pour etre "large") + vector<unsigned int> cff_train_labels, cff_dev_labels; //tableau de targets (labels) + read_files(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels); + int input_dim = read_fm(argv[5], argv[1]); + + // ParameterCollection name (for saving) --------------------------------------------------------- + ostringstream os; + // Store a bunch of information in the model name + os << "mlp" << getpid() << ".params"; + /*<< "_" << 784 << "-" << 512 << "-relu-" << 0.2 + << "_" << 512 << "-" << 512 << "-relu-" << 0.2 + << "_" << 512 << "-" << 10 << "-softmax" + << "_" << getpid() + << ".params";*/ + const string fname = os.str(); + cerr << "Parameters will be written to: " << fname << endl; + + // Build model ----------------------------------------------------------------------------------- + ParameterCollection model; + // Use Adam optimizer + AdamTrainer trainer(model); + trainer.clip_threshold *= batch_size; + + // Create model + MLP nn(model, vector<Layer>({ + Layer(/* input_dim (nb de features) */ 5, /* output_dim */ 50, /* activation */ RELU, /* dropout_rate */ 0.2), + Layer(/* input_dim */ 50, /* output_dim */ 100, /* activation */ RELU, /* dropout_rate */ 0.2), + Layer(/* input_dim */ 100, /* output_dim */ 150, /* activation */ RELU, /* dropout_rate */ 0.2), + Layer(/* input_dim */ 150, /* output_dim (nb de classes possibles) */ 3, /* activation */ LINEAR, /* dropout_rate */ 0.0) + })); + + + // Load preexisting weights (if provided) + if (argv[6] != NULL) + { + TextFileLoader loader(argv[6]); + loader.populate(model); + } + + // Initialize variables for training ------------------------------------------------------------- + // Worst accuracy + double worst = 0; + + // Number of batches in training set + unsigned int num_batches = cff_train.size() / batch_size - 1; //921; + //nb de ligne / batch size-1 + //cout << "NUMBER OF BATCHES = " << num_batches <<endl; + + // Random indexing + unsigned int si; + vector<unsigned int> order(num_batches); + for (unsigned int i = 0; i < num_batches; ++i) + order[i] = i; + + int epoch = 0; + vector<Expression> cur_batch; + vector<unsigned int> cur_labels; + + // Run for the given number of epochs (or indefinitely if nb_epochs is negative) + while (epoch < nb_epochs || nb_epochs < 0) + { + // Reshuffle the dataset + cerr << "**SHUFFLE\n"; + random_shuffle(order.begin(), order.end()); + + // Initialize loss and number of samples processed (to average loss) + double loss = 0; + double num_samples = 0; + + // Start timer + std::unique_ptr<Timer> iteration(new Timer("completed in")); + + // Activate dropout + nn.enable_dropout(); + + for (si = 0; si < num_batches; ++si) + { + // build graph for this instance + ComputationGraph cg; + + // Compute batch start id and size + int id = order[si] * batch_size; + unsigned int bsize = std::min((unsigned int) cff_train.size() - id, batch_size); + // cout <<"BSIZE = "<<bsize<<endl; + + // Get input batch + cur_batch = vector<Expression>(bsize); + cur_labels = vector<unsigned int>(bsize); + for (unsigned int idx = 0; idx < bsize; ++idx) + { + cur_batch[idx] = input(cg, {5}, cff_train[id + idx]); + cur_labels[idx] = cff_train_labels[id + idx]; + } + // Reshape as batch (not very intuitive yet) + Expression x_batch = reshape(concatenate_cols(cur_batch), Dim({5}, bsize)); + + // Get negative log likelihood on batch + Expression loss_expr = nn.get_nll(x_batch, cur_labels, cg); + + // Get scalar error for monitoring + loss += as_scalar(cg.forward(loss_expr)); + + // Increment number of samples processed + num_samples += bsize; + + // Compute gradient with backward pass + cg.backward(loss_expr); + + // Update parameters + trainer.update(); + //cout<<"SI= "<<si<<endl; + // Print progress every tenth of the dataset + if ((si + 1) % (num_batches / 10) == 0 || si == num_batches - 1) + { + // Print informations + trainer.status(); + cerr << " E = " << (loss / num_samples) << ' '; + + // Reinitialize timer + iteration.reset(new Timer("completed in")); + + // Reinitialize loss + loss = 0; + num_samples = 0; + } + } + // Disable dropout for dev testing + nn.disable_dropout(); + + // Show score on dev data + if (si == num_batches) + { + double dpos = 0; + int nb_errors = 0; + for (unsigned int i = 0; i < cff_dev.size(); ++i) + { + // build graph for this instance + ComputationGraph cg; + + // Get input expression + Expression x = input(cg, {5}, cff_dev[i]); + + // Get negative log likelihood on batch + int predicted_idx = nn.predict(x, cg); + + // Increment count of positive classification + if (predicted_idx == cff_dev_labels[i]) + dpos++; + /*else + ++nb_errors;*/ + + if(epoch+1 == nb_epochs) + predicted_file << predicted_idx << endl; + } + + // If the dev loss is lower than the previous ones, save the model + if (dpos > worst) + { + worst = dpos; + TextFileSaver saver(fname); + saver.save(model); + } + + // Print informations + cerr << "\n***DEV [epoch=" << epoch+1 + << "] E = " << (dpos / (double) cff_dev.size()) << ' '; + //cerr << "Success rate = " << 100-(float)nb_errors*100/cff_dev_labels.size() << "%\n\n"; + + // Reinitialize timer + iteration.reset(new Timer("completed in")); + } + // Increment epoch + ++epoch; + } + + return 0; +}