diff --git a/maca_trans_parser/src/train_cff.cpp b/maca_trans_parser/src/train_cff.cpp index 92bb3161edf9db1ede23fa221234bbc30e9d6e95..95692cb57d98a171a84f3849c0c496b675917bc7 100644 --- a/maca_trans_parser/src/train_cff.cpp +++ b/maca_trans_parser/src/train_cff.cpp @@ -26,7 +26,7 @@ Layer::Layer(unsigned input_dim, unsigned output_dim, Activation activation, flo */ MLP::MLP(ParameterCollection & model) { - LAYERS = layers.size(); + LAYERS = 0; } /** @@ -36,44 +36,44 @@ MLP::MLP(ParameterCollection & model) * \param model : ParameterCollection (to contain parameters) * \param filename : file containing the MLP's structure */ -MLP::MLP(ParameterCollection& model, char* filename/*vector<Layer> layers*/) +MLP::MLP(ParameterCollection& model, char* filename) { - read_struct_mlp(filename); - // Verify layers compatibility - for (unsigned l = 0; l < layers.size() - 1; ++l) - { - if (layers[l].output_dim != layers[l + 1].input_dim) - throw invalid_argument("Layer dimensions don't match"); + ifstream file(filename, ios::in); + if(!file) + { + cerr << "Impossible d'ouvrir le fichier " << filename << endl; + exit(EXIT_FAILURE); } - - // Register parameters in model - for (Layer layer : layers) + unsigned int input_dim, output_dim; + float dropout; + Activation activ_fonction; + string fonction; + + while(file >> input_dim) //input_dim { - append(model, layer); - } -} - -/** - * \brief Append a layer at the end of the network - * - * \param model - * \param layer -*/ -void MLP::append(ParameterCollection& model, Layer layer) -{ - // Check compatibility - if (LAYERS > 0) - if (layers[LAYERS - 1].output_dim != layer.input_dim) + if(input_dim == 0) //saute les commentaires + { + getline(file, fonction); + continue; + } + file >> output_dim; + file >> fonction; + activ_fonction = activation_fonction(fonction); + file >> dropout; + + Layer tmp_layer(input_dim, output_dim, activ_fonction, dropout); + layers.push_back(tmp_layer); + LAYERS++; + + if (LAYERS > 1 && layers[LAYERS-1].input_dim != layers[LAYERS-2].output_dim) throw invalid_argument("Layer dimensions don't match"); - - // Add to layers - layers.push_back(layer); - LAYERS++; - // Register parameters - Parameter W = model.add_parameters({layer.output_dim, layer.input_dim}); - Parameter b = model.add_parameters({layer.output_dim}); - params.push_back({W, b}); + // Register parameters + Parameter W = model.add_parameters({layers[LAYERS-1].output_dim, layers[LAYERS-1].input_dim}); + Parameter b = model.add_parameters({layers[LAYERS-1].output_dim}); + params.push_back({W, b}); + } + file.close(); } /** @@ -212,24 +212,12 @@ inline Expression MLP::activate(Expression h, Activation f) } } -void MLP::read_struct_mlp(char* filename) +Activation MLP::activation_fonction(string fonction) { - ifstream file(filename, ios::in); - if(!file) - { - cerr << "Impossible d'ouvrir le fichier " << filename << endl; - exit(EXIT_FAILURE); - } - float tmp[4]; - - while(file >> tmp[0]) //input_dim - { - file >> tmp[1]; //output_dim - file >> tmp[2]; //activation rate - file >> tmp[3]; //dropout - - Layer tmp_layer(tmp[0], tmp[1], tmp[2], tmp[3]); - layers.push_back(tmp_layer); - } -} - + if(fonction=="LINEAR") return LINEAR; + if(fonction=="RELU") return RELU; + if(fonction=="SIGMOID") return SIGMOID; + if(fonction=="TANH") return TANH; + if(fonction=="SOFTMAX") return SOFTMAX; + return RELU; //defaut +} diff --git a/maca_trans_parser/src/train_cff.hpp b/maca_trans_parser/src/train_cff.hpp index 3869270f204a04f5e0bc4d0bab968518f7ce507d..d1d57f62270c0eca9bb31b885e57d6823ff48dcd 100644 --- a/maca_trans_parser/src/train_cff.hpp +++ b/maca_trans_parser/src/train_cff.hpp @@ -11,7 +11,7 @@ #include <fstream> #include <sstream> #include <algorithm> - #include <vector> + #include <string> /** @@ -57,12 +57,11 @@ bool dropout_active = true; public: - void read_struct_mlp(char* filename); MLP(dynet::ParameterCollection & model); - MLP(dynet::ParameterCollection& model, char* filename/*std::vector<Layer> layers*/); - void append(dynet::ParameterCollection& model, Layer layer); + MLP(dynet::ParameterCollection& model, char* filename); + Activation activation_fonction(std::string fonction); dynet::Expression run(dynet::Expression x, dynet::ComputationGraph& cg); - dynet::Expression get_nll(dynet::Expression x, std::vector</*dynet::real*/unsigned int> labels, dynet::ComputationGraph& cg); + dynet::Expression get_nll(dynet::Expression x, std::vector<unsigned int> labels, dynet::ComputationGraph& cg); int predict(dynet::Expression x, dynet::ComputationGraph& cg); void enable_dropout(); void disable_dropout(); diff --git a/maca_trans_parser/src/train_dynet.cpp b/maca_trans_parser/src/train_dynet.cpp index d99165b6596fe7d3a77c7ffe17b6a0886b7480c3..a9df04bbd94ce9825406e4700a534282022fc76e 100644 --- a/maca_trans_parser/src/train_dynet.cpp +++ b/maca_trans_parser/src/train_dynet.cpp @@ -1,14 +1,16 @@ /** - USAGE : - ./trainCFF train_file dev_file batch_size nb_epochs + Ex fichier pour la structure du MLP (commence par 0 = commentaire) : + + 0 for each layer : input_dim output_dim activation dropout + 0 activation : SIGMOID, TANH, RELU, LINEAR, SOFTMAX + 5 50 RELU 0.5 + 50 60 RELU 0.5 + 60 3 LINEAR 0 **/ /** Corriger cmake pour inclure Dynet -Lire le fichier de Flo et Seb -init le vecteur de Layer (push_back) -* Représentation one-hot Représentation embedding **/ @@ -16,13 +18,13 @@ Représentation embedding #include <iostream> #include <fstream> #include "train_cff.hpp" -#include "dynet/io.h" +#include "dynet/io.h"/* extern "C" { #include "feat_model.h" #include "cf_file.h" } - +*/ #define NB_FEATS 5 @@ -34,7 +36,7 @@ using namespace dynet; /** * Recupere a partir d'un fichier fm la dimention de l'entree du reseau * */ - + /* int read_fm(char *filename_fm, char *cff_filename){ int dim, input_dim = 0; feat_model * feat_m = feat_model_read(filename_fm, NULL ,0); @@ -45,10 +47,7 @@ int read_fm(char *filename_fm, char *cff_filename){ input_dim += tab[i]; } return input_dim; -} - - - +}*/ /** * Mets les features du fichier train dans la matrice cff_train @@ -56,7 +55,7 @@ int read_fm(char *filename_fm, char *cff_filename){ * Mets les labels du fichier train dans le vecteur cff_train_labels * Mets les labels du fichier dev dans le vecteur cff_dev_labels **/ -void read_files(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train, +void init_dataset(char* train_filename, char* dev_filename, vector<vector<float>> &cff_train, vector<vector<float>> &cff_dev, vector<unsigned int> &cff_train_labels, vector<unsigned int> &cff_dev_labels) { @@ -98,7 +97,7 @@ int main(int argc, char** argv) if(argc > 7 || argc < 6) { cerr << "Usage : " << argv[0] - << "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> [model_file<string>]\n"; + << "train_file<string> dev_file<string> batch_size<int> nb_epochs<int> mlp_struct_file<string> [model_file<string>]\n"; exit(EXIT_FAILURE); } @@ -106,10 +105,8 @@ int main(int argc, char** argv) auto dyparams = dynet::extract_dynet_params(argc, argv); dynet::initialize(dyparams); - // Fetch program specific parameters (see ../utils/cl-args.h) ------------------------------------ - /*Params params; - get_args(argc, argv, params, TRAIN_SUP);*/ //pas utile pour nous - unsigned int batch_size = atoi(argv[3]); // Mettre 297 + // Init variables -------------------------------------------------------------------------------- + unsigned int batch_size = atoi(argv[3]); int nb_epochs = atoi(argv[4]); @@ -124,18 +121,13 @@ int main(int argc, char** argv) // Load Dataset ---------------------------------------------------------------------------------- vector<vector<float>> cff_train, cff_dev; //matrice de features (data) (float pour etre "large") vector<unsigned int> cff_train_labels, cff_dev_labels; //tableau de targets (labels) - read_files(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels); - int input_dim = read_fm(argv[5], argv[1]); + init_dataset(argv[1], argv[2], cff_train, cff_dev, cff_train_labels, cff_dev_labels); + //int input_dim = read_fm(argv[5], argv[1]); // ParameterCollection name (for saving) --------------------------------------------------------- ostringstream os; // Store a bunch of information in the model name os << "mlp" << getpid() << ".params"; - /*<< "_" << 784 << "-" << 512 << "-relu-" << 0.2 - << "_" << 512 << "-" << 512 << "-relu-" << 0.2 - << "_" << 512 << "-" << 10 << "-softmax" - << "_" << getpid() - << ".params";*/ const string fname = os.str(); cerr << "Parameters will be written to: " << fname << endl; @@ -146,29 +138,22 @@ int main(int argc, char** argv) trainer.clip_threshold *= batch_size; // Create model - MLP nn(model, vector<Layer>({ - Layer(/* input_dim (nb de features) */ input_dim, /* output_dim */ 50, /* activation */ RELU, /* dropout_rate */ 0.2), - Layer(/* input_dim */ 50, /* output_dim */ 100, /* activation */ RELU, /* dropout_rate */ 0.2), - Layer(/* input_dim */ 100, /* output_dim */ 150, /* activation */ RELU, /* dropout_rate */ 0.2), - Layer(/* input_dim */ 150, /* output_dim (nb de classes possibles) */ 3, /* activation */ LINEAR, /* dropout_rate */ 0.0) - })); + MLP nn(model, argv[5]); // Load preexisting weights (if provided) - if (argv[6] != NULL) + /**if (argv[6] != NULL) { TextFileLoader loader(argv[6]); loader.populate(model); - } + }*/ // Initialize variables for training ------------------------------------------------------------- // Worst accuracy double worst = 0; // Number of batches in training set - unsigned int num_batches = cff_train.size() / batch_size - 1; //921; - //nb de ligne / batch size-1 - //cout << "NUMBER OF BATCHES = " << num_batches <<endl; + unsigned int num_batches = cff_train.size() / batch_size - 1; // Random indexing unsigned int si; @@ -205,7 +190,6 @@ int main(int argc, char** argv) // Compute batch start id and size int id = order[si] * batch_size; unsigned int bsize = std::min((unsigned int) cff_train.size() - id, batch_size); - // cout <<"BSIZE = "<<bsize<<endl; // Get input batch cur_batch = vector<Expression>(bsize); @@ -232,7 +216,7 @@ int main(int argc, char** argv) // Update parameters trainer.update(); - //cout<<"SI= "<<si<<endl; + // Print progress every tenth of the dataset if ((si + 1) % (num_batches / 10) == 0 || si == num_batches - 1) { @@ -255,7 +239,6 @@ int main(int argc, char** argv) if (si == num_batches) { double dpos = 0; - int nb_errors = 0; for (unsigned int i = 0; i < cff_dev.size(); ++i) { // build graph for this instance @@ -270,8 +253,6 @@ int main(int argc, char** argv) // Increment count of positive classification if (predicted_idx == cff_dev_labels[i]) dpos++; - /*else - ++nb_errors;*/ if(epoch+1 == nb_epochs) predicted_file << predicted_idx << endl; @@ -288,7 +269,6 @@ int main(int argc, char** argv) // Print informations cerr << "\n***DEV [epoch=" << epoch+1 << "] E = " << (dpos / (double) cff_dev.size()) << ' '; - //cerr << "Success rate = " << 100-(float)nb_errors*100/cff_dev_labels.size() << "%\n\n"; // Reinitialize timer iteration.reset(new Timer("completed in"));