diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index 995f03268bda520949a297f69f1e68dc0ee7efaa..82a97c49defd2391481d611dbdd995223da760f4 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -1,3 +1,8 @@ +/// @file MLP.hpp +/// @author Franck Dary +/// @version 1.0 +/// @date 2018-08-03 + #ifndef MLP__H #define MLP__H @@ -8,12 +13,16 @@ #include <dynet/expr.h> #include "FeatureModel.hpp" +/// @brief Multi Layer Perceptron. +/// It is capable of training itself given a batch of examples.\n +/// Once trained, it can also be used to predict the class of a certain input. class MLP { public : using Examples = std::pair< std::vector<int>, std::vector<std::pair<int, FeatureModel::FeatureDescription> > >; + /// @brief Activation function for a MLP Layer. enum Activation { SIGMOID, @@ -26,61 +35,175 @@ class MLP SOFTMAX }; + /// @brief Get the string corresponding to an Activation. + /// + /// @param a The activation. + /// + /// @return The string corresponding to a. static std::string activation2str(Activation a); + /// @brief Get the Activation corresponding to a string. + /// + /// @param s The string. + /// + /// @return The Activation corresponding to s. If s is unknown, the program abort. static Activation str2activation(std::string s); + /// @brief A simple struct that represents a MLP Layer. struct Layer { + /// @brief Number of input neurons of this Layer. int input_dim; + /// @brief Number of output neurons of this Layer. int output_dim; + /// @brief The dropout rate to apply to this Layer when training. float dropout_rate; + /// @brief The activation function for this Layer. Activation activation; + /// @brief Construct a new Layer + /// + /// @param input_dim + /// @param output_dim + /// @param dropout_rate + /// @param activation Layer(int input_dim, int output_dim, float dropout_rate, Activation activation); + /// @brief Print a description of this Layer. + /// + /// @param file Where to print the output. void print(FILE * file); }; private : + /// @brief The maximum number of parameters of the model. static const unsigned int MAXLOOKUPSIZE = 200000; + /// @brief The Layers of the MLP. std::vector<Layer> layers; + /// @brief The parameters corresponding to the layers of the MLP. std::vector< std::vector<dynet::Parameter> > parameters; + /// @brief The parameters corresponding to Dict values. std::map< Dict*, std::pair<dynet::LookupParameter, std::map<void*, unsigned int> > > lookupParameters; + /// @brief The dynet model containing the parameters to be trained. dynet::ParameterCollection model; + /// @brief The training algorithm that will be used. std::unique_ptr<dynet::AmsgradTrainer> trainer; + /// @brief Whether the program is in train mode or not (only in train mode the parameters will be updated). bool trainMode; + /// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step. bool dropoutActive; private : + /// @brief Add the parameters of a layer into the dynet model. + /// + /// @param layer The layer to add. void addLayerToModel(Layer & layer); + /// @brief Abort the program if the layers are not compatible. void checkLayersCompatibility(); + /// @brief Set dynet and srand() seeds. + /// + /// @return The DynetParams containing the set seed. dynet::DynetParams & getDefaultParams(); + /// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the Multi Layer Perceptron. + /// + /// @param cg The current Computation Graph. + /// @param fv The FeatureValue that will be converted. + /// + /// @return A dynet Expression of value fv that can be used as an input in the Multi Layer Perceptron dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv); + /// @brief Compute the image of input x by the Multi Layer Perceptron. + /// + /// @param cg The current computation graph. + /// @param x The input of the Multi Layer Perceptron. + /// + /// @return The result (values of the output Layer) of the computation of x by the Multi Layer Perceptron. dynet::Expression run(dynet::ComputationGraph & cg, dynet::Expression x); + /// @brief Compute the image of an expression by an activation function. + /// + /// @param h The expression we want the image of. + /// @param f The activation function. + /// + /// @return f(h) inline dynet::Expression activate(dynet::Expression h, Activation f); + /// @brief Print the parameters. + /// + /// @param output Where the parameters will be printed to. void printParameters(FILE * output); + /// @brief Save the structure of the MLP (all the Layer) to a file. + /// The goal is to store the structure of the MLP into a file so that + /// we can load it and use it another time. + /// @param filename The file in which the structure will be saved. void saveStruct(const std::string & filename); + /// @brief Save the learned parameters of the MLP to a file. + /// Only the parameters of the Layers will be saved by this function.\n + /// The parameters that are values inside of Dict, will be saved by their owner, + /// the Dict object. + /// @param filename The file in which the parameters will be saved. void saveParameters(const std::string & filename); + /// @brief Load and construt all the Layer from a file. + /// The file must have been written by the function saveStruct. + /// @param filename The file from which the structure will be read. void loadStruct(const std::string & filename); + /// @brief Load and populate the model with parameters from a file. + /// The file must have been written by the function saveParameters. + /// @param filename The file from which the parameters will be read. void loadParameters(const std::string & filename); + /// @brief Load a MLP from a file. + /// This function will use loadStruct and loadParameters. + /// @param filename The file from which the MLP will be loaded. void load(const std::string & filename); + /// @brief Initialize the dynet library. + /// Must be called only once, and before any call to dynet functions. void initDynet(); public : - MLP(std::vector<Layer> layers); + /// @brief Construct a new untrained MLP from a desired topology. + /// topology example for 2 hidden layers : (150,RELU,0.3)(50,ELU,0.2)\n + /// Of sizes 150 and 50, activation functions RELU and ELU, and dropout rates + /// of 0.3 and 0.2. + /// @param nbInputs The size of the input layer of the MLP. + /// @param topology Description of each hidden Layer of the MLP. + /// @param nbOutputs The size of the output layer of the MLP. MLP(int nbInputs, const std::string & topology, int nbOutputs); + /// @brief Read and construct a trained MLP from a file. + /// The file must have been written by save. + /// @param filename The file to read the MLP from. MLP(const std::string & filename); + /// @brief Give a score to each possible class, given an input. + /// + /// @param fd The input to use. + /// + /// @return A vector containing one score per possible class. std::vector<float> predict(FeatureModel::FeatureDescription & fd); + /// @brief Train the MLP on a batch of training examples. + /// The parameters will be updated by this function. + /// @param examples A set of training examples. + /// @param start The index of the first element of the batch. + /// @param end The index of the last element of the batch. + /// + /// @return The number of examples for which the class was correctly predicted by the MLP. int trainOnBatch(Examples & examples, int start, int end); + /// @brief Get the score of the MLP on a batch of training examples. + /// The parameters will not be updated by this function. + /// @param examples A set of training examples. + /// @param start The index of the first element of the batch. + /// @param end The index of the last element of the batch. + /// + /// @return The number of examples for which the class was correctly predicted by the MLP. int getScoreOnBatch(Examples & examples, int start, int end); + /// @brief Save the MLP to a file. + /// + /// @param filename The file to write the MLP to. void save(const std::string & filename); + /// @brief Print the topology (Layers) of the MLP. + /// + /// @param output Where the topology will be printed. void printTopology(FILE * output); }; diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp index 3bc91819a61e1e9b0b5a6f991b2cda82dab0760f..609484a141c5d11f8ffe161afac742d136645eac 100644 --- a/MLP/src/MLP.cpp +++ b/MLP/src/MLP.cpp @@ -120,22 +120,6 @@ MLP::MLP(int nbInputs, const std::string & topology, int nbOutputs) addLayerToModel(layer); } -MLP::MLP(std::vector<Layer> layers) -: layers(layers) -{ - trainer.reset(new dynet::AmsgradTrainer(model, 0.001, 0.9, 0.999, 1e-8)); - - initDynet(); - - trainMode = true; - dropoutActive = true; - - checkLayersCompatibility(); - - for(Layer layer : layers) - addLayerToModel(layer); -} - void MLP::addLayerToModel(Layer & layer) { dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim});