diff --git a/neural_network/include/MLP.hpp b/neural_network/include/MLP.hpp index 5c73975f9081601946cd42ab8e2ceb43af75c1d8..ab7b3c8bcbf0246f7f1a83a4de7ff15121be95eb 100644 --- a/neural_network/include/MLP.hpp +++ b/neural_network/include/MLP.hpp @@ -7,6 +7,7 @@ #define MLP__H #include "NeuralNetwork.hpp" +#include "MLPBase.hpp" #include "ProgramParameters.hpp" /// @brief Multi Layer Perceptron. @@ -17,77 +18,10 @@ class MLP : public NeuralNetwork { private : - /// @brief The Layers of the MLP. - std::vector<Layer> layers; - /// @brief The parameters corresponding to the layers of the MLP. - std::vector< std::vector<dynet::Parameter> > parameters; - + /// @brief The mlp that will be trained. + MLPBase mlp; /// @brief The training algorithm that will be used. std::unique_ptr<dynet::Trainer> trainer; - /// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step. - bool dropoutActive; - - /// @brief The current minibatch. - std::vector<FeatureModel::FeatureDescription> fds; - /// @brief gold classes of the current minibatch. - std::vector<unsigned int> golds; - - private : - - /// @brief Add the parameters of a layer into the dynet model. - /// - /// @param layer The layer to add. - void addLayerToModel(Layer & layer); - /// @brief Abort the program if the layers are not compatible. - void checkLayersCompatibility(); - /// @brief Compute the image of input x by the Multi Layer Perceptron. - /// - /// @param cg The current computation graph. - /// @param x The input of the Multi Layer Perceptron. - /// - /// @return The result (values of the output Layer) of the computation of x by the Multi Layer Perceptron. - dynet::Expression run(dynet::ComputationGraph & cg, dynet::Expression x); - /// @brief Print the parameters. - /// - /// @param output Where the parameters will be printed to. - void printParameters(FILE * output); - /// @brief Save the structure of the MLP (all the Layer) to a file. - /// - /// The goal is to store the structure of the MLP into a file so that - /// we can load it and use it another time. - /// @param filename The file in which the structure will be saved. - void saveStruct(const std::string & filename); - /// @brief Save the learned parameters of the MLP to a file. - /// - /// Only the parameters of the Layers will be saved by this function.\n - /// The parameters that are values inside of Dict, will be saved by their owner, - /// the Dict object. - /// @param filename The file in which the parameters will be saved. - void saveParameters(const std::string & filename); - /// @brief Load and construt all the Layer from a file. - /// - /// The file must have been written by the function saveStruct. - /// @param filename The file from which the structure will be read. - void loadStruct(const std::string & filename); - /// @brief Load and populate the model with parameters from a file. - /// - /// The file must have been written by the function saveParameters. - /// @param filename The file from which the parameters will be read. - void loadParameters(const std::string & filename); - /// @brief Load a MLP from a file. - /// - /// This function will use loadStruct and loadParameters. - /// @param filename The file from which the MLP will be loaded. - void load(const std::string & filename); - /// @brief Get the loss expression - /// - /// @param output Output from the neural network - /// @param oneHotGolds Indexes of gold classes (batched form) - /// - /// @return The loss expression - dynet::Expression weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); - - dynet::Expression errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); public : @@ -113,7 +47,6 @@ class MLP : public NeuralNetwork /// /// @return A vector containing one score per possible class. std::vector<float> predict(FeatureModel::FeatureDescription & fd) override; - /// @brief Update the parameters according to the given gold class. /// /// @param fd The input to use. @@ -121,7 +54,6 @@ class MLP : public NeuralNetwork /// /// @return The loss. float update(FeatureModel::FeatureDescription & fd, int gold) override; - /// @brief Save the MLP to a file. /// /// @param filename The file to write the MLP to. diff --git a/neural_network/include/MLPBase.hpp b/neural_network/include/MLPBase.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0411a45da1a0326aa86d125ad4b3193a30152306 --- /dev/null +++ b/neural_network/include/MLPBase.hpp @@ -0,0 +1,122 @@ +/// @file MLPBase.hpp +/// @author Franck Dary +/// @version 1.0 +/// @date 2019-01-13 + +#ifndef MLPBASE__H +#define MLPBASE__H + +#include <vector> +#include <map> +#include <string> +#include "ProgramParameters.hpp" +#include "NeuralNetwork.hpp" + +/// @brief Multi Layer Perceptron. +/// +/// Once trained, it can also be used to predict the class of a certain input. +class MLPBase +{ + private : + + using Layer = NeuralNetwork::Layer; + /// @brief The Layers of the MLP. + std::vector<Layer> layers; + /// @brief The parameters corresponding to the layers of the MLP. + std::vector< std::vector<dynet::Parameter> > parameters; + + /// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step. + bool dropoutActive; + + /// @brief The current minibatch. + std::vector<FeatureModel::FeatureDescription> fds; + /// @brief gold classes of the current minibatch. + std::vector<unsigned int> golds; + + public : + + /// @brief Add the parameters of a layer into the dynet model. + /// + /// @param model The dynet model that will contains the parameters of the layer. + /// @param layer The layer to add. + void addLayerToModel(dynet::ParameterCollection & model, Layer & layer); + /// @brief Abort the program if the layers are not compatible. + void checkLayersCompatibility(); + /// @brief Compute the image of input x by the Multi Layer Perceptron. + /// + /// @param cg The current computation graph. + /// @param x The input of the Multi Layer Perceptron. + /// + /// @return The result (values of the output Layer) of the computation of x by the Multi Layer Perceptron. + dynet::Expression run(dynet::ComputationGraph & cg, dynet::Expression x); + /// @brief Print the parameters. + /// + /// @param output Where the parameters will be printed to. + void printParameters(FILE * output); + /// @brief Save the structure of the MLP (all the Layer) to a file. + /// + /// The goal is to store the structure of the MLP into a file so that + /// we can load it and use it another time. + /// @param filename The file in which the structure will be saved. + void saveStruct(const std::string & filename); + /// @brief Save the learned parameters of the MLP to a file. + /// + /// Only the parameters of the Layers will be saved by this function.\n + /// The parameters that are values inside of Dict, will be saved by their owner, + /// the Dict object. + /// @param filename The file in which the parameters will be saved. + void saveParameters(const std::string & filename); + /// @brief Load and construt all the Layer from a file. + /// + /// The file must have been written by the function saveStruct. + /// @param model The dynet model that will contain the loaded parameters. + /// @param filename The file from which the structure will be read. + void loadStruct(dynet::ParameterCollection & model, const std::string & filename); + /// @brief Load and populate the model with parameters from a file. + /// + /// The file must have been written by the function saveParameters. + /// @param model The dynet model that will contain the loaded parameters. + /// @param filename The file from which the parameters will be read. + void loadParameters(dynet::ParameterCollection & model, const std::string & filename); + /// @brief Get the loss expression + /// + /// @param output Output from the neural network + /// @param oneHotGolds Indexes of gold classes (batched form) + /// + /// @return The loss expression + dynet::Expression weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); + + dynet::Expression errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); + + /// @brief initialize a new untrained MLP from a desired topology. + /// + /// topology example for 2 hidden layers : (150,RELU,0.3)(50,ELU,0.2)\n + /// Of sizes 150 and 50, activation functions RELU and ELU, and dropout rates + /// of 0.3 and 0.2. + /// @param model The dynet model that will contains all the MLP parameters. + /// @param nbInputs The size of the input layer of the MLP. + /// @param topology Description of each hidden Layer of the MLP. + /// @param nbOutputs The size of the output layer of the MLP. + void init(dynet::ParameterCollection & model, int nbInputs, const std::string & topology, int nbOutputs); + /// @brief Construct a new MLP for training. + MLPBase(); + /// @brief Give a score to each possible class, given an input. + /// + /// @param fd The input to use. + /// + /// @return A vector containing one score per possible class. + std::vector<float> predict(FeatureModel::FeatureDescription & fd); + /// @brief Update the parameters according to the given gold class. + /// + /// @param fd The input to use. + /// @param gold The gold class of this input. + /// + /// @return The loss. + float update(FeatureModel::FeatureDescription & fd, int gold); + /// @brief Print the topology (Layers) of the MLP. + /// + /// @param output Where the topology will be printed. + void printTopology(FILE * output); +}; + +#endif diff --git a/neural_network/include/NeuralNetwork.hpp b/neural_network/include/NeuralNetwork.hpp index 9f6a6df43b90b70127a301dec0c0555f2d5e0734..28c2ef1ef5fe894103c30135ed908456cb272ddd 100644 --- a/neural_network/include/NeuralNetwork.hpp +++ b/neural_network/include/NeuralNetwork.hpp @@ -12,7 +12,7 @@ class NeuralNetwork { - protected : + public : /// @brief Activation function for a Layer. enum Activation @@ -68,6 +68,22 @@ class NeuralNetwork void print(FILE * file); }; + /// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the NeuralNetwork. + /// + /// @param cg The current Computation Graph. + /// @param fv The FeatureValue that will be converted. + /// + /// @return A dynet Expression of value fv that can be used as an input in the NeuralNetwork + static dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv); + + /// @brief Compute the image of an expression by an activation function. + /// + /// @param h The expression we want the image of. + /// @param f The activation function. + /// + /// @return f(h) + static dynet::Expression activate(dynet::Expression h, Activation f); + protected : /// @brief The seed that will be used by RNG (srand and dynet) @@ -81,13 +97,6 @@ class NeuralNetwork protected : - /// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the NeuralNetwork. - /// - /// @param cg The current Computation Graph. - /// @param fv The FeatureValue that will be converted. - /// - /// @return A dynet Expression of value fv that can be used as an input in the NeuralNetwork - dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv); /// @brief Set dynet and srand() seeds. /// /// @return The DynetParams containing the set seed. @@ -96,13 +105,6 @@ class NeuralNetwork /// /// Must be called only once, and before any call to dynet functions. void initDynet(); - /// @brief Compute the image of an expression by an activation function. - /// - /// @param h The expression we want the image of. - /// @param f The activation function. - /// - /// @return f(h) - dynet::Expression activate(dynet::Expression h, Activation f); public : diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp index 39f6ecb49bebbe3b07733ddfc5c54a48c1d95b03..74e1113e9e975b2b5faef8ee9c8c24457f450b04 100644 --- a/neural_network/src/MLP.cpp +++ b/neural_network/src/MLP.cpp @@ -3,44 +3,23 @@ MLP::MLP() { randomSeed = ProgramParameters::seed; - dropoutActive = true; trainer.reset(createTrainer()); initDynet(); } -void MLP::init(int nbInputs, const std::string & topology, int nbOutputs) +MLP::MLP(const std::string & filename) { - std::string topo = topology; - std::replace(topo.begin(), topo.end(), '(', ' '); - std::replace(topo.begin(), topo.end(), ')', ' '); - - auto groups = split(topo); - for (auto group : groups) - { - if(group.empty()) - continue; - - std::replace(group.begin(), group.end(), ',', ' '); - auto layer = split(group); - - if (layer.size() != 3) - { - fprintf(stderr, "ERROR (%s) : invalid topology \'%s\'. Aborting.\n", ERRINFO, topology.c_str()); - exit(1); - } - - int input = layers.empty() ? nbInputs : layers.back().output_dim; - int output = std::stoi(layer[0]); - float dropout = std::stof(layer[2]); - layers.emplace_back(input, output, dropout, str2activation(layer[1])); - } - - layers.emplace_back(layers.back().output_dim, nbOutputs, 0.0, Activation::LINEAR); + randomSeed = ProgramParameters::seed; + trainer.reset(createTrainer()); + initDynet(); - checkLayersCompatibility(); + mlp.loadStruct(model, filename); + mlp.loadParameters(model, filename); +} - for(Layer layer : layers) - addLayerToModel(layer); +void MLP::init(int nbInputs, const std::string & topology, int nbOutputs) +{ + mlp.init(model, nbInputs, topology, nbOutputs); } dynet::Trainer * MLP::createTrainer() @@ -63,325 +42,28 @@ dynet::Trainer * MLP::createTrainer() return nullptr; } -void MLP::addLayerToModel(Layer & layer) -{ - dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim}); - dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim}); - if (!ProgramParameters::randomParameters) - { - W.set_value(std::vector<float>((unsigned)layer.output_dim * (unsigned)layer.input_dim, 1.0)); - b.set_value(std::vector<float>((unsigned)layer.output_dim, 1.0)); - } - parameters.push_back({W,b}); -} - -void MLP::checkLayersCompatibility() -{ - if(layers.empty()) - { - fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO); - exit(1); - } - - for(unsigned int i = 0; i < layers.size()-1; i++) - if(layers[i].output_dim != layers[i+1].input_dim) - { - fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO); - exit(1); - } -} - std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd) { - bool currentDropoutActive = dropoutActive; - dropoutActive = false; - dynet::ComputationGraph cg; - - std::vector<dynet::Expression> expressions; - - for (auto & featValue : fd.values) - expressions.emplace_back(featValue2Expression(cg, featValue)); - - dynet::Expression input = dynet::concatenate(expressions); - - dynet::Expression output = run(cg, input); - - dropoutActive = currentDropoutActive; - - return as_vector(cg.forward(output)); + return mlp.predict(fd); } float MLP::update(FeatureModel::FeatureDescription & fd, int gold) { - fds.emplace_back(fd); - golds.emplace_back(gold); - - if ((int)fds.size() < ProgramParameters::batchSize) - return 0.0; - - std::vector<dynet::Expression> inputs; - dynet::ComputationGraph cg; + float loss = mlp.update(fd, gold); - for (auto & example : fds) - { - std::vector<dynet::Expression> expressions; - - for (auto & featValue : example.values) - expressions.emplace_back(featValue2Expression(cg, featValue)); - - dynet::Expression input = dynet::concatenate(expressions); - inputs.emplace_back(input); - } - - dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs); - dynet::Expression output = run(cg, batchedInput); - dynet::Expression batchedLoss; - - if (ProgramParameters::loss == "neglogsoftmax") - { - batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, golds)); - } - else if (ProgramParameters::loss == "weighted") - { - batchedLoss = weightedLoss(output, golds); - } - else if (ProgramParameters::loss == "errorCorrection") - { - batchedLoss = errorCorrectionLoss(cg,output, golds); - } - else - { - fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str()); - exit(1); - } - - cg.backward(batchedLoss); trainer->update(); - fds.clear(); - golds.clear(); - - return as_scalar(batchedLoss.value()); -} - -dynet::Expression MLP::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) -{ - std::vector<dynet::Expression> lossExpr; - for (unsigned int i = 0; i < output.dim().batch_elems(); i++) - { - lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i])); - auto outputVect = dynet::as_vector(dynet::pick_batch_elem(output,i).value()); - int prediction = 0; - for (unsigned int j = 1; j < outputVect.size(); j++) - if(outputVect[j] > outputVect[prediction]) - prediction = j; - int gold = oneHotGolds[i]; - if (prediction == 1 && gold == 0) - { - lossExpr.back() = lossExpr.back() * 100.0; - } - } - - return dynet::sum(lossExpr); -} - -dynet::Expression MLP::errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) -{ - std::vector<dynet::Expression> lossExpr; - for (unsigned int i = 0; i < output.dim().batch_elems(); i++) - { - unsigned int u = 0; - dynet::Expression c = dynet::pick(dynet::one_hot(cg, layers.back().output_dim, oneHotGolds[i]),u); - dynet::Expression a = dynet::pick(dynet::softmax(dynet::pick_batch_elem(output,i)),u); - lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i),oneHotGolds[i])+2-c-a*c+(dynet::acos(a-1)*(c-1))); - if (ProgramParameters::debug) - { - cg.forward(lossExpr.back()); - fprintf(stderr, "a=%.2f c=%.2f loss=%.2f\n", dynet::as_scalar(a.value()),dynet::as_scalar(c.value()),dynet::as_scalar(lossExpr.back().value())); - } - } - - return dynet::sum(lossExpr); -} - -dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x) -{ - static std::vector< std::pair<std::string,dynet::Expression> > exprForDebug; - - // Expression for the current hidden state - dynet::Expression h_cur = x; - - if (ProgramParameters::showFeatureRepresentation) - { - if (ProgramParameters::showFeatureRepresentation == 1) - for (unsigned int i = 0; i < 81; i++) - fprintf(stderr, "%s", i == 80 ? "\n" : "-"); - exprForDebug.clear(); - if (ProgramParameters::showFeatureRepresentation == 1) - exprForDebug.emplace_back("Input layer", h_cur); - if (ProgramParameters::showFeatureRepresentation >= 2) - exprForDebug.emplace_back("", h_cur); - } - - for(unsigned int l = 0; l < layers.size(); l++) - { - // Initialize parameters in computation graph - dynet::Expression W = parameter(cg, parameters[l][0]); - dynet::Expression b = parameter(cg, parameters[l][1]); - // Apply affine transform - dynet::Expression a = dynet::affine_transform({b, W, h_cur}); - // Apply activation function - dynet::Expression h = activate(a, layers[l].activation); - // Take care of dropout - dynet::Expression h_dropped; - if(layers[l].dropout_rate > 0){ - if(dropoutActive){ - dynet::Expression mask = random_bernoulli(cg, - {(unsigned int)layers[l].output_dim}, 1 - layers[l].dropout_rate); - h_dropped = cmult(h, mask); - } - else{ - h_dropped = h * (1 - layers[l].dropout_rate); - } - } - else{ - h_dropped = h; - } - - if (ProgramParameters::showFeatureRepresentation) - { - if (ProgramParameters::showFeatureRepresentation == 1) - { - exprForDebug.emplace_back("Result of h = h*W_" + std::to_string(l) + " + b_" + std::to_string(l), a); - exprForDebug.emplace_back("Result of h = a_" + std::to_string(l) + "(h)", h); - exprForDebug.emplace_back("Result of h = dropout_" + std::to_string(l) + "(h)", h_dropped); - } - else if (ProgramParameters::showFeatureRepresentation >= 2) - { - exprForDebug.emplace_back("", a); - exprForDebug.emplace_back("", h); - } - } - - h_cur = h_dropped; - } - - if (ProgramParameters::showFeatureRepresentation) - { - cg.forward(h_cur); - - if (ProgramParameters::showFeatureRepresentation == 1) - { - for (auto & it : exprForDebug) - fprintf(stderr, "%s (dimension=%lu) :\n%s\n", it.first.c_str(), dynet::as_vector(it.second.value()).size(), expression2str(it.second).c_str()); - for (unsigned int i = 0; i < 81; i++) - fprintf(stderr, "%s", i == 80 ? "\n" : "-"); - } - else if (ProgramParameters::showFeatureRepresentation >= 2) - { - for (auto & it : exprForDebug) - fprintf(stderr, "| %s |", expression2str(it.second).c_str()); - fprintf(stderr, "\n"); - } - } - - return h_cur; -} - -void MLP::printParameters(FILE * output) -{ - fprintf(output, "Parameters : NOT IMPLEMENTED\n"); + return loss; } void MLP::save(const std::string & filename) { - saveStruct(filename); - saveParameters(filename); -} - -void MLP::saveStruct(const std::string & filename) -{ - File file(filename, "w"); - FILE * fd = file.getDescriptor(); - - for (auto & layer : layers) - { - fprintf(fd, "Layer : %d %d %s %.2f\n", layer.input_dim, layer.output_dim, activation2str(layer.activation).c_str(), layer.dropout_rate); - } -} - -void MLP::saveParameters(const std::string & filename) -{ - dynet::TextFileSaver s(filename, true); - std::string prefix("Layer_"); - - for(unsigned int i = 0; i < parameters.size(); i++) - { - s.save(parameters[i][0], prefix + std::to_string(i) + "_W"); - s.save(parameters[i][1], prefix + std::to_string(i) + "_b"); - } -} - -void MLP::load(const std::string & filename) -{ - loadStruct(filename); - loadParameters(filename); -} - -void MLP::loadStruct(const std::string & filename) -{ - File file(filename, "r"); - FILE * fd = file.getDescriptor(); - - char activation[1024]; - int input; - int output; - float dropout; - - while (fscanf(fd, "Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4) - layers.emplace_back(input, output, dropout, str2activation(activation)); - - checkLayersCompatibility(); - - for (auto & layer : layers) - addLayerToModel(layer); -} - -void MLP::loadParameters(const std::string & filename) -{ - dynet::TextFileLoader loader(filename); - std::string prefix("Layer_"); - - for(unsigned int i = 0; i < parameters.size(); i++) - { - parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W"); - parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b"); - } -} - -MLP::MLP(const std::string & filename) -{ - dropoutActive = true; - - randomSeed = ProgramParameters::seed; - trainer.reset(createTrainer()); - initDynet(); - - load(filename); + mlp.saveStruct(filename); + mlp.saveParameters(filename); } void MLP::printTopology(FILE * output) { - fprintf(output, "("); - for(unsigned int i = 0; i < layers.size(); i++) - { - auto & layer = layers[i]; - - if(i == 0) - fprintf(output, "%d", layer.input_dim); - fprintf(output, "->%d", layer.output_dim); - } - - fprintf(output, ")\n"); + mlp.printTopology(output); } diff --git a/neural_network/src/MLPBase.cpp b/neural_network/src/MLPBase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5b9355b7e2f92c1edcb9cb4cec9cc9a24a9d9b85 --- /dev/null +++ b/neural_network/src/MLPBase.cpp @@ -0,0 +1,340 @@ +#include "MLPBase.hpp" + +MLPBase::MLPBase() +{ + dropoutActive = true; +} + +void MLPBase::init(dynet::ParameterCollection & model, int nbInputs, const std::string & topology, int nbOutputs) +{ + std::string topo = topology; + std::replace(topo.begin(), topo.end(), '(', ' '); + std::replace(topo.begin(), topo.end(), ')', ' '); + + auto groups = split(topo); + for (auto group : groups) + { + if(group.empty()) + continue; + + std::replace(group.begin(), group.end(), ',', ' '); + auto layer = split(group); + + if (layer.size() != 3) + { + fprintf(stderr, "ERROR (%s) : invalid topology \'%s\'. Aborting.\n", ERRINFO, topology.c_str()); + exit(1); + } + + int input = layers.empty() ? nbInputs : layers.back().output_dim; + int output = std::stoi(layer[0]); + float dropout = std::stof(layer[2]); + layers.emplace_back(input, output, dropout, NeuralNetwork::str2activation(layer[1])); + } + + layers.emplace_back(layers.back().output_dim, nbOutputs, 0.0, NeuralNetwork::Activation::LINEAR); + + checkLayersCompatibility(); + + for(Layer layer : layers) + addLayerToModel(model, layer); +} + +void MLPBase::addLayerToModel(dynet::ParameterCollection & model, Layer & layer) +{ + dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim}); + dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim}); + if (!ProgramParameters::randomParameters) + { + W.set_value(std::vector<float>((unsigned)layer.output_dim * (unsigned)layer.input_dim, 1.0)); + b.set_value(std::vector<float>((unsigned)layer.output_dim, 1.0)); + } + parameters.push_back({W,b}); +} + +void MLPBase::checkLayersCompatibility() +{ + if(layers.empty()) + { + fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO); + exit(1); + } + + for(unsigned int i = 0; i < layers.size()-1; i++) + if(layers[i].output_dim != layers[i+1].input_dim) + { + fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO); + exit(1); + } +} + +std::vector<float> MLPBase::predict(FeatureModel::FeatureDescription & fd) +{ + bool currentDropoutActive = dropoutActive; + dropoutActive = false; + dynet::ComputationGraph cg; + + std::vector<dynet::Expression> expressions; + + for (auto & featValue : fd.values) + expressions.emplace_back(NeuralNetwork::featValue2Expression(cg, featValue)); + + dynet::Expression input = dynet::concatenate(expressions); + + dynet::Expression output = run(cg, input); + + dropoutActive = currentDropoutActive; + + return as_vector(cg.forward(output)); +} + +float MLPBase::update(FeatureModel::FeatureDescription & fd, int gold) +{ + fds.emplace_back(fd); + golds.emplace_back(gold); + + if ((int)fds.size() < ProgramParameters::batchSize) + return 0.0; + + std::vector<dynet::Expression> inputs; + dynet::ComputationGraph cg; + + for (auto & example : fds) + { + std::vector<dynet::Expression> expressions; + + for (auto & featValue : example.values) + expressions.emplace_back(NeuralNetwork::featValue2Expression(cg, featValue)); + + dynet::Expression input = dynet::concatenate(expressions); + inputs.emplace_back(input); + } + + dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs); + dynet::Expression output = run(cg, batchedInput); + dynet::Expression batchedLoss; + + if (ProgramParameters::loss == "neglogsoftmax") + { + batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, golds)); + } + else if (ProgramParameters::loss == "weighted") + { + batchedLoss = weightedLoss(output, golds); + } + else if (ProgramParameters::loss == "errorCorrection") + { + batchedLoss = errorCorrectionLoss(cg, output, golds); + } + else + { + fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str()); + exit(1); + } + + cg.backward(batchedLoss); + + fds.clear(); + golds.clear(); + + return as_scalar(batchedLoss.value()); +} + +dynet::Expression MLPBase::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) +{ + std::vector<dynet::Expression> lossExpr; + for (unsigned int i = 0; i < output.dim().batch_elems(); i++) + { + lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i])); + auto outputVect = dynet::as_vector(dynet::pick_batch_elem(output,i).value()); + int prediction = 0; + for (unsigned int j = 1; j < outputVect.size(); j++) + if(outputVect[j] > outputVect[prediction]) + prediction = j; + int gold = oneHotGolds[i]; + if (prediction == 1 && gold == 0) + { + lossExpr.back() = lossExpr.back() * 100.0; + } + } + + return dynet::sum(lossExpr); +} + +dynet::Expression MLPBase::errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) +{ + std::vector<dynet::Expression> lossExpr; + for (unsigned int i = 0; i < output.dim().batch_elems(); i++) + { + unsigned int u = 0; + dynet::Expression c = dynet::pick(dynet::one_hot(cg, layers.back().output_dim, oneHotGolds[i]),u); + dynet::Expression a = dynet::pick(dynet::softmax(dynet::pick_batch_elem(output,i)),u); + lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i),oneHotGolds[i])+2-c-a*c+(dynet::acos(a-1)*(c-1))); + if (ProgramParameters::debug) + { + cg.forward(lossExpr.back()); + fprintf(stderr, "a=%.2f c=%.2f loss=%.2f\n", dynet::as_scalar(a.value()),dynet::as_scalar(c.value()),dynet::as_scalar(lossExpr.back().value())); + } + } + + return dynet::sum(lossExpr); +} + +dynet::Expression MLPBase::run(dynet::ComputationGraph & cg, dynet::Expression x) +{ + static std::vector< std::pair<std::string,dynet::Expression> > exprForDebug; + + // Expression for the current hidden state + dynet::Expression h_cur = x; + + if (ProgramParameters::showFeatureRepresentation) + { + if (ProgramParameters::showFeatureRepresentation == 1) + for (unsigned int i = 0; i < 81; i++) + fprintf(stderr, "%s", i == 80 ? "\n" : "-"); + exprForDebug.clear(); + if (ProgramParameters::showFeatureRepresentation == 1) + exprForDebug.emplace_back("Input layer", h_cur); + if (ProgramParameters::showFeatureRepresentation >= 2) + exprForDebug.emplace_back("", h_cur); + } + + for(unsigned int l = 0; l < layers.size(); l++) + { + // Initialize parameters in computation graph + dynet::Expression W = parameter(cg, parameters[l][0]); + dynet::Expression b = parameter(cg, parameters[l][1]); + // Apply affine transform + dynet::Expression a = dynet::affine_transform({b, W, h_cur}); + // Apply activation function + dynet::Expression h = NeuralNetwork::activate(a, layers[l].activation); + // Take care of dropout + dynet::Expression h_dropped; + if(layers[l].dropout_rate > 0){ + if(dropoutActive){ + dynet::Expression mask = random_bernoulli(cg, + {(unsigned int)layers[l].output_dim}, 1 - layers[l].dropout_rate); + h_dropped = cmult(h, mask); + } + else{ + h_dropped = h * (1 - layers[l].dropout_rate); + } + } + else{ + h_dropped = h; + } + + if (ProgramParameters::showFeatureRepresentation) + { + if (ProgramParameters::showFeatureRepresentation == 1) + { + exprForDebug.emplace_back("Result of h = h*W_" + std::to_string(l) + " + b_" + std::to_string(l), a); + exprForDebug.emplace_back("Result of h = a_" + std::to_string(l) + "(h)", h); + exprForDebug.emplace_back("Result of h = dropout_" + std::to_string(l) + "(h)", h_dropped); + } + else if (ProgramParameters::showFeatureRepresentation >= 2) + { + exprForDebug.emplace_back("", a); + exprForDebug.emplace_back("", h); + } + } + + h_cur = h_dropped; + } + + if (ProgramParameters::showFeatureRepresentation) + { + cg.forward(h_cur); + + if (ProgramParameters::showFeatureRepresentation == 1) + { + for (auto & it : exprForDebug) + fprintf(stderr, "%s (dimension=%lu) :\n%s\n", it.first.c_str(), dynet::as_vector(it.second.value()).size(), NeuralNetwork::expression2str(it.second).c_str()); + for (unsigned int i = 0; i < 81; i++) + fprintf(stderr, "%s", i == 80 ? "\n" : "-"); + } + else if (ProgramParameters::showFeatureRepresentation >= 2) + { + for (auto & it : exprForDebug) + fprintf(stderr, "| %s |", NeuralNetwork::expression2str(it.second).c_str()); + fprintf(stderr, "\n"); + } + } + + return h_cur; +} + +void MLPBase::printParameters(FILE * output) +{ + fprintf(output, "Parameters : NOT IMPLEMENTED\n"); +} + +void MLPBase::saveStruct(const std::string & filename) +{ + File file(filename, "w"); + FILE * fd = file.getDescriptor(); + + for (auto & layer : layers) + { + fprintf(fd, "Layer : %d %d %s %.2f\n", layer.input_dim, layer.output_dim, NeuralNetwork::activation2str(layer.activation).c_str(), layer.dropout_rate); + } +} + +void MLPBase::saveParameters(const std::string & filename) +{ + dynet::TextFileSaver s(filename, true); + std::string prefix("Layer_"); + + for(unsigned int i = 0; i < parameters.size(); i++) + { + s.save(parameters[i][0], prefix + std::to_string(i) + "_W"); + s.save(parameters[i][1], prefix + std::to_string(i) + "_b"); + } +} + +void MLPBase::loadStruct(dynet::ParameterCollection & model, const std::string & filename) +{ + File file(filename, "r"); + FILE * fd = file.getDescriptor(); + + char activation[1024]; + int input; + int output; + float dropout; + + while (fscanf(fd, "Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4) + layers.emplace_back(input, output, dropout, NeuralNetwork::str2activation(activation)); + + checkLayersCompatibility(); + + for (auto & layer : layers) + addLayerToModel(model, layer); +} + +void MLPBase::loadParameters(dynet::ParameterCollection & model, const std::string & filename) +{ + dynet::TextFileLoader loader(filename); + std::string prefix("Layer_"); + + for(unsigned int i = 0; i < parameters.size(); i++) + { + parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W"); + parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b"); + } +} + +void MLPBase::printTopology(FILE * output) +{ + fprintf(output, "("); + for(unsigned int i = 0; i < layers.size(); i++) + { + auto & layer = layers[i]; + + if(i == 0) + fprintf(output, "%d", layer.input_dim); + fprintf(output, "->%d", layer.output_dim); + } + + fprintf(output, ")\n"); +} +