diff --git a/CMakeLists.txt b/CMakeLists.txt index ec5e2a61198dcf3d4f293cfd909e50a5116acffb..c9c940540f8216ced054cfb8bf755a6117a76603 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,6 @@ include_directories(transition_machine/include) include_directories(trainer/include) include_directories(decoder/include) include_directories(neural_network/include) -include_directories(MLP/include) include_directories(error_correction/include) add_subdirectory(maca_common) @@ -36,13 +35,12 @@ add_subdirectory(transition_machine) add_subdirectory(trainer) add_subdirectory(decoder) add_subdirectory(neural_network) -add_subdirectory(MLP) add_subdirectory(error_correction) set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/config) -add_custom_target( doc_doxygen ALL - COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_IN} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Generating documentation with Doxygen" - VERBATIM ) +#add_custom_target( doc_doxygen ALL +# COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_IN} +# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} +# COMMENT "Generating documentation with Doxygen" +# VERBATIM ) diff --git a/MLP/CMakeLists.txt b/MLP/CMakeLists.txt deleted file mode 100644 index 0d6f0be647741ce34e5e65bc814e0c3e9a2164c7..0000000000000000000000000000000000000000 --- a/MLP/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB SOURCES src/*.cpp) - -#compiling library -add_library(MLP STATIC ${SOURCES}) -target_link_libraries(MLP dynet) -target_link_libraries(MLP transition_machine) diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp deleted file mode 100644 index d40715a6ce0e21c4c69da32fe01b4abd35f86b57..0000000000000000000000000000000000000000 --- a/MLP/include/MLP.hpp +++ /dev/null @@ -1,237 +0,0 @@ -/// @file MLP.hpp -/// @author Franck Dary -/// @version 1.0 -/// @date 2018-08-03 - -#ifndef MLP__H -#define MLP__H - -#include <dynet/nodes.h> -#include <dynet/dynet.h> -#include <dynet/training.h> -#include <dynet/timing.h> -#include <dynet/expr.h> -#include "FeatureModel.hpp" - -/// @brief Multi Layer Perceptron. -/// -/// It is capable of training itself given a batch of examples.\n -/// Once trained, it can also be used to predict the class of a certain input. -class MLP -{ - public : - - /// @brief Activation function for a MLP Layer. - enum Activation - { - SIGMOID, - TANH, - RELU, - ELU, - LINEAR, - SPARSEMAX, - CUBE, - SOFTMAX - }; - - /// @brief The seed that will be used by RNG (srand and dynet) - static int randomSeed; - - static bool dynetIsInit; - - /// @brief Get the string corresponding to an Activation. - /// - /// @param a The activation. - /// - /// @return The string corresponding to a. - static std::string activation2str(Activation a); - /// @brief Get the Activation corresponding to a string. - /// - /// @param s The string. - /// - /// @return The Activation corresponding to s. If s is unknown, the program abort. - static Activation str2activation(std::string s); - - /// @brief A simple struct that represents a MLP Layer. - struct Layer - { - /// @brief Number of input neurons of this Layer. - int input_dim; - /// @brief Number of output neurons of this Layer. - int output_dim; - - /// @brief The dropout rate to apply to this Layer when training. - float dropout_rate; - /// @brief The activation function for this Layer. - Activation activation; - - /// @brief Construct a new Layer - /// - /// @param input_dim - /// @param output_dim - /// @param dropout_rate - /// @param activation - Layer(int input_dim, int output_dim, - float dropout_rate, Activation activation); - /// @brief Print a description of this Layer. - /// - /// @param file Where to print the output. - void print(FILE * file); - }; - - private : - - /// @brief The Layers of the MLP. - std::vector<Layer> layers; - /// @brief The parameters corresponding to the layers of the MLP. - std::vector< std::vector<dynet::Parameter> > parameters; - - /// @brief The dynet model containing the parameters to be trained. - dynet::ParameterCollection model; - /// @brief The training algorithm that will be used. - std::unique_ptr<dynet::Trainer> trainer; - /// @brief Must the Layer dropout rate be taken into account during the computations ? Usually it is only during the training step. - bool dropoutActive; - - /// @brief The current minibatch. - std::vector<FeatureModel::FeatureDescription> fds; - /// @brief gold classes of the current minibatch. - std::vector<unsigned int> golds; - - private : - - /// @brief Add the parameters of a layer into the dynet model. - /// - /// @param layer The layer to add. - void addLayerToModel(Layer & layer); - /// @brief Abort the program if the layers are not compatible. - void checkLayersCompatibility(); - /// @brief Set dynet and srand() seeds. - /// - /// @return The DynetParams containing the set seed. - dynet::DynetParams & getDefaultParams(); - /// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the Multi Layer Perceptron. - /// - /// @param cg The current Computation Graph. - /// @param fv The FeatureValue that will be converted. - /// - /// @return A dynet Expression of value fv that can be used as an input in the Multi Layer Perceptron - dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv); - /// @brief Compute the image of input x by the Multi Layer Perceptron. - /// - /// @param cg The current computation graph. - /// @param x The input of the Multi Layer Perceptron. - /// - /// @return The result (values of the output Layer) of the computation of x by the Multi Layer Perceptron. - dynet::Expression run(dynet::ComputationGraph & cg, dynet::Expression x); - /// @brief Compute the image of an expression by an activation function. - /// - /// @param h The expression we want the image of. - /// @param f The activation function. - /// - /// @return f(h) - inline dynet::Expression activate(dynet::Expression h, Activation f); - /// @brief Print the parameters. - /// - /// @param output Where the parameters will be printed to. - void printParameters(FILE * output); - /// @brief Save the structure of the MLP (all the Layer) to a file. - /// - /// The goal is to store the structure of the MLP into a file so that - /// we can load it and use it another time. - /// @param filename The file in which the structure will be saved. - void saveStruct(const std::string & filename); - /// @brief Save the learned parameters of the MLP to a file. - /// - /// Only the parameters of the Layers will be saved by this function.\n - /// The parameters that are values inside of Dict, will be saved by their owner, - /// the Dict object. - /// @param filename The file in which the parameters will be saved. - void saveParameters(const std::string & filename); - /// @brief Load and construt all the Layer from a file. - /// - /// The file must have been written by the function saveStruct. - /// @param filename The file from which the structure will be read. - void loadStruct(const std::string & filename); - /// @brief Load and populate the model with parameters from a file. - /// - /// The file must have been written by the function saveParameters. - /// @param filename The file from which the parameters will be read. - void loadParameters(const std::string & filename); - /// @brief Load a MLP from a file. - /// - /// This function will use loadStruct and loadParameters. - /// @param filename The file from which the MLP will be loaded. - void load(const std::string & filename); - /// @brief Initialize the dynet library. - /// - /// Must be called only once, and before any call to dynet functions. - void initDynet(); - /// @brief Get the loss expression - /// - /// @param output Output from the neural network - /// @param oneHotGolds Indexes of gold classes (batched form) - /// - /// @return The loss expression - dynet::Expression weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); - - dynet::Expression errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds); - - public : - - /// @brief Convert a dynet expression to a string (usefull for debug purposes) - /// - /// @param expr The expression to convert. - /// - /// @return A string representing the expression. - static std::string expression2str(dynet::Expression & expr); - /// @brief initialize a new untrained MLP from a desired topology. - /// - /// topology example for 2 hidden layers : (150,RELU,0.3)(50,ELU,0.2)\n - /// Of sizes 150 and 50, activation functions RELU and ELU, and dropout rates - /// of 0.3 and 0.2. - /// @param nbInputs The size of the input layer of the MLP. - /// @param topology Description of each hidden Layer of the MLP. - /// @param nbOutputs The size of the output layer of the MLP. - void init(int nbInputs, const std::string & topology, int nbOutputs); - /// @brief Construct a new MLP for training. - MLP(); - /// @brief Read and construct a trained MLP from a file. - /// - /// The file must have been written by save. - /// @param filename The file to read the MLP from. - MLP(const std::string & filename); - /// @brief Give a score to each possible class, given an input. - /// - /// @param fd The input to use. - /// - /// @return A vector containing one score per possible class. - std::vector<float> predict(FeatureModel::FeatureDescription & fd); - - /// @brief Update the parameters according to the given gold class. - /// - /// @param fd The input to use. - /// @param gold The gold class of this input. - /// - /// @return The loss. - float update(FeatureModel::FeatureDescription & fd, int gold); - - /// @brief Save the MLP to a file. - /// - /// @param filename The file to write the MLP to. - void save(const std::string & filename); - /// @brief Print the topology (Layers) of the MLP. - /// - /// @param output Where the topology will be printed. - void printTopology(FILE * output); - /// @brief Allocate the correct trainer type depending on the program parameters. - /// - /// @return A pointer to the newly allocated trainer. - dynet::Trainer * createTrainer(); - /// @brief Return the model. - /// - /// @return The model of this MLP. - dynet::ParameterCollection & getModel(); -}; - -#endif diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp deleted file mode 100644 index 01ee7b3bf37d44a4fcaf88cdd71cf55e31984d74..0000000000000000000000000000000000000000 --- a/MLP/src/MLP.cpp +++ /dev/null @@ -1,559 +0,0 @@ -#include "MLP.hpp" -#include "File.hpp" -#include "util.hpp" -#include "ProgramParameters.hpp" - -#include <dynet/param-init.h> -#include <dynet/io.h> - -int MLP::randomSeed = 0; -bool MLP::dynetIsInit = false; - -std::string MLP::activation2str(Activation a) -{ - switch(a) - { - case LINEAR : - return "LINEAR"; - break; - case RELU : - return "RELU"; - break; - case ELU : - return "ELU"; - break; - case CUBE : - return "CUBE"; - break; - case SIGMOID : - return "SIGMOID"; - break; - case TANH : - return "TANH"; - break; - case SOFTMAX : - return "SOFTMAX"; - break; - case SPARSEMAX : - return "SPARSEMAX"; - break; - default : - break; - } - - return "UNKNOWN"; -} - -MLP::Activation MLP::str2activation(std::string s) -{ - if(s == "LINEAR") - return LINEAR; - else if(s == "RELU") - return RELU; - else if(s == "ELU") - return ELU; - else if(s == "CUBE") - return CUBE; - else if(s == "SIGMOID") - return SIGMOID; - else if(s == "TANH") - return TANH; - else if(s == "SOFTMAX") - return SOFTMAX; - else if(s == "SPARSEMAX") - return SPARSEMAX; - else - { - fprintf(stderr, "ERROR (%s) : invalid activation \'%s\'. Aborting\n",ERRINFO, s.c_str()); - exit(1); - } - - return LINEAR; -} - -void MLP::initDynet() -{ - if(dynetIsInit) - return; - - dynetIsInit = true; - dynet::initialize(getDefaultParams()); -} - -MLP::MLP() -{ - randomSeed = ProgramParameters::seed; - dropoutActive = true; - trainer.reset(createTrainer()); - initDynet(); -} - -void MLP::init(int nbInputs, const std::string & topology, int nbOutputs) -{ - std::string topo = topology; - std::replace(topo.begin(), topo.end(), '(', ' '); - std::replace(topo.begin(), topo.end(), ')', ' '); - - auto groups = split(topo); - for (auto group : groups) - { - if(group.empty()) - continue; - - std::replace(group.begin(), group.end(), ',', ' '); - auto layer = split(group); - - if (layer.size() != 3) - { - fprintf(stderr, "ERROR (%s) : invalid topology \'%s\'. Aborting.\n", ERRINFO, topology.c_str()); - exit(1); - } - - int input = layers.empty() ? nbInputs : layers.back().output_dim; - int output = std::stoi(layer[0]); - float dropout = std::stof(layer[2]); - layers.emplace_back(input, output, dropout, str2activation(layer[1])); - } - - layers.emplace_back(layers.back().output_dim, nbOutputs, 0.0, Activation::LINEAR); - - checkLayersCompatibility(); - - for(Layer layer : layers) - addLayerToModel(layer); -} - -dynet::Trainer * MLP::createTrainer() -{ - auto optimizer = noAccentLower(ProgramParameters::optimizer); - - if (optimizer == "amsgrad") - return new dynet::AmsgradTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias); - else if (optimizer == "adam") - return new dynet::AdamTrainer(model, ProgramParameters::learningRate, ProgramParameters::beta1, ProgramParameters::beta2, ProgramParameters::bias); - else if (optimizer == "sgd") - return new dynet::SimpleSGDTrainer(model, ProgramParameters::learningRate); - else if (optimizer == "none") - return nullptr; - - fprintf(stderr, "ERROR (%s) : unknown optimizer \'%s\'. Aborting.\n", ERRINFO, optimizer.c_str()); - - exit(1); - - return nullptr; -} - -void MLP::addLayerToModel(Layer & layer) -{ - dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim}); - dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim}); - if (!ProgramParameters::randomParameters) - { - W.set_value(std::vector<float>((unsigned)layer.output_dim * (unsigned)layer.input_dim, 1.0)); - b.set_value(std::vector<float>((unsigned)layer.output_dim, 1.0)); - } - parameters.push_back({W,b}); -} - -void MLP::checkLayersCompatibility() -{ - if(layers.empty()) - { - fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO); - exit(1); - } - - for(unsigned int i = 0; i < layers.size()-1; i++) - if(layers[i].output_dim != layers[i+1].input_dim) - { - fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO); - exit(1); - } -} - -MLP::Layer::Layer(int input_dim, int output_dim, - float dropout_rate, Activation activation) -{ - this->input_dim = input_dim; - this->output_dim = output_dim; - this->dropout_rate = dropout_rate; - this->activation = activation; -} - -std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd) -{ - bool currentDropoutActive = dropoutActive; - dropoutActive = false; - dynet::ComputationGraph cg; - - std::vector<dynet::Expression> expressions; - - for (auto & featValue : fd.values) - expressions.emplace_back(featValue2Expression(cg, featValue)); - - dynet::Expression input = dynet::concatenate(expressions); - - dynet::Expression output = run(cg, input); - - dropoutActive = currentDropoutActive; - - return as_vector(cg.forward(output)); -} - -float MLP::update(FeatureModel::FeatureDescription & fd, int gold) -{ - fds.emplace_back(fd); - golds.emplace_back(gold); - - if ((int)fds.size() < ProgramParameters::batchSize) - return 0.0; - - std::vector<dynet::Expression> inputs; - dynet::ComputationGraph cg; - - for (auto & example : fds) - { - std::vector<dynet::Expression> expressions; - - for (auto & featValue : example.values) - expressions.emplace_back(featValue2Expression(cg, featValue)); - - dynet::Expression input = dynet::concatenate(expressions); - inputs.emplace_back(input); - } - - dynet::Expression batchedInput = dynet::concatenate_to_batch(inputs); - dynet::Expression output = run(cg, batchedInput); - dynet::Expression batchedLoss; - - if (ProgramParameters::loss == "neglogsoftmax") - { - batchedLoss = dynet::sum_batches(pickneglogsoftmax(output, golds)); - } - else if (ProgramParameters::loss == "weighted") - { - batchedLoss = weightedLoss(output, golds); - } - else if (ProgramParameters::loss == "errorCorrection") - { - batchedLoss = errorCorrectionLoss(cg,output, golds); - } - else - { - fprintf(stderr, "ERROR (%s) : Unknown loss function \'%s\'. Aborting.\n", ERRINFO, ProgramParameters::loss.c_str()); - exit(1); - } - - cg.backward(batchedLoss); - trainer->update(); - - fds.clear(); - golds.clear(); - - return as_scalar(batchedLoss.value()); -} - -dynet::Expression MLP::weightedLoss(dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) -{ - std::vector<dynet::Expression> lossExpr; - for (unsigned int i = 0; i < output.dim().batch_elems(); i++) - { - lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i), oneHotGolds[i])); - auto outputVect = dynet::as_vector(dynet::pick_batch_elem(output,i).value()); - int prediction = 0; - for (unsigned int j = 1; j < outputVect.size(); j++) - if(outputVect[j] > outputVect[prediction]) - prediction = j; - int gold = oneHotGolds[i]; - if (prediction == 1 && gold == 0) - { - lossExpr.back() = lossExpr.back() * 100.0; - } - } - - return dynet::sum(lossExpr); -} - -dynet::Expression MLP::errorCorrectionLoss(dynet::ComputationGraph & cg, dynet::Expression & output, std::vector<unsigned int> & oneHotGolds) -{ - std::vector<dynet::Expression> lossExpr; - for (unsigned int i = 0; i < output.dim().batch_elems(); i++) - { - unsigned int u = 0; - dynet::Expression c = dynet::pick(dynet::one_hot(cg, layers.back().output_dim, oneHotGolds[i]),u); - dynet::Expression a = dynet::pick(dynet::softmax(dynet::pick_batch_elem(output,i)),u); - lossExpr.emplace_back(dynet::pickneglogsoftmax(dynet::pick_batch_elem(output, i),oneHotGolds[i])+2-c-a*c+(dynet::acos(a-1)*(c-1))); - if (ProgramParameters::debug) - { - cg.forward(lossExpr.back()); - fprintf(stderr, "a=%.2f c=%.2f loss=%.2f\n", dynet::as_scalar(a.value()),dynet::as_scalar(c.value()),dynet::as_scalar(lossExpr.back().value())); - } - } - - return dynet::sum(lossExpr); -} - -dynet::DynetParams & MLP::getDefaultParams() -{ - static dynet::DynetParams params; - params.random_seed = randomSeed; - - std::srand(params.random_seed); - - return params; -} - -dynet::Expression MLP::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv) -{ - std::vector<dynet::Expression> expressions; - - for (unsigned int i = 0; i < fv.dicts.size(); i++) - { - Dict * dict = fv.dicts[i]; - bool isConst = (fv.policies[i] == FeatureModel::Policy::Final) || (dict->mode == Dict::Mode::OneHot); - - auto & lu = dict->getLookupParameter(); - unsigned int index = dict->getValue(fv.values[i]); - - if(isConst) - expressions.emplace_back(dynet::const_lookup(cg, lu, index)); - else - expressions.emplace_back(dynet::lookup(cg, lu, index)); - } - - return dynet::concatenate(expressions); -} - -dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x) -{ - static std::vector< std::pair<std::string,dynet::Expression> > exprForDebug; - - // Expression for the current hidden state - dynet::Expression h_cur = x; - - if (ProgramParameters::showFeatureRepresentation) - { - if (ProgramParameters::showFeatureRepresentation == 1) - for (unsigned int i = 0; i < 81; i++) - fprintf(stderr, "%s", i == 80 ? "\n" : "-"); - exprForDebug.clear(); - if (ProgramParameters::showFeatureRepresentation == 1) - exprForDebug.emplace_back("Input layer", h_cur); - if (ProgramParameters::showFeatureRepresentation >= 2) - exprForDebug.emplace_back("", h_cur); - } - - for(unsigned int l = 0; l < layers.size(); l++) - { - // Initialize parameters in computation graph - dynet::Expression W = parameter(cg, parameters[l][0]); - dynet::Expression b = parameter(cg, parameters[l][1]); - // Apply affine transform - dynet::Expression a = dynet::affine_transform({b, W, h_cur}); - // Apply activation function - dynet::Expression h = activate(a, layers[l].activation); - // Take care of dropout - dynet::Expression h_dropped; - if(layers[l].dropout_rate > 0){ - if(dropoutActive){ - dynet::Expression mask = random_bernoulli(cg, - {(unsigned int)layers[l].output_dim}, 1 - layers[l].dropout_rate); - h_dropped = cmult(h, mask); - } - else{ - h_dropped = h * (1 - layers[l].dropout_rate); - } - } - else{ - h_dropped = h; - } - - if (ProgramParameters::showFeatureRepresentation) - { - if (ProgramParameters::showFeatureRepresentation == 1) - { - exprForDebug.emplace_back("Result of h = h*W_" + std::to_string(l) + " + b_" + std::to_string(l), a); - exprForDebug.emplace_back("Result of h = a_" + std::to_string(l) + "(h)", h); - exprForDebug.emplace_back("Result of h = dropout_" + std::to_string(l) + "(h)", h_dropped); - } - else if (ProgramParameters::showFeatureRepresentation >= 2) - { - exprForDebug.emplace_back("", a); - exprForDebug.emplace_back("", h); - } - } - - h_cur = h_dropped; - } - - if (ProgramParameters::showFeatureRepresentation) - { - cg.forward(h_cur); - - if (ProgramParameters::showFeatureRepresentation == 1) - { - for (auto & it : exprForDebug) - fprintf(stderr, "%s (dimension=%lu) :\n%s\n", it.first.c_str(), dynet::as_vector(it.second.value()).size(), expression2str(it.second).c_str()); - for (unsigned int i = 0; i < 81; i++) - fprintf(stderr, "%s", i == 80 ? "\n" : "-"); - } - else if (ProgramParameters::showFeatureRepresentation >= 2) - { - for (auto & it : exprForDebug) - fprintf(stderr, "| %s |", expression2str(it.second).c_str()); - fprintf(stderr, "\n"); - } - } - - return h_cur; -} - -inline dynet::Expression MLP::activate(dynet::Expression h, Activation f) -{ - switch(f) - { - case LINEAR : - return h; - break; - case RELU : - return rectify(h); - break; - case ELU : - return elu(h); - break; - case SIGMOID : - return logistic(h); - break; - case TANH : - return tanh(h); - break; - case SOFTMAX : - return softmax(h); - break; - default : - break; - } - - fprintf(stderr, "ERROR (%s) : Activation not implemented \'%s\'. Aborting.\n", ERRINFO, activation2str(f).c_str()); - exit(1); - - return h; -} - -void MLP::printParameters(FILE * output) -{ - fprintf(output, "Parameters : NOT IMPLEMENTED\n"); -} - -void MLP::save(const std::string & filename) -{ - saveStruct(filename); - saveParameters(filename); -} - -void MLP::saveStruct(const std::string & filename) -{ - File file(filename, "w"); - FILE * fd = file.getDescriptor(); - - for (auto & layer : layers) - { - fprintf(fd, "Layer : %d %d %s %.2f\n", layer.input_dim, layer.output_dim, activation2str(layer.activation).c_str(), layer.dropout_rate); - } -} - -void MLP::saveParameters(const std::string & filename) -{ - dynet::TextFileSaver s(filename, true); - std::string prefix("Layer_"); - - for(unsigned int i = 0; i < parameters.size(); i++) - { - s.save(parameters[i][0], prefix + std::to_string(i) + "_W"); - s.save(parameters[i][1], prefix + std::to_string(i) + "_b"); - } -} - -void MLP::load(const std::string & filename) -{ - loadStruct(filename); - loadParameters(filename); -} - -void MLP::loadStruct(const std::string & filename) -{ - File file(filename, "r"); - FILE * fd = file.getDescriptor(); - - char activation[1024]; - int input; - int output; - float dropout; - - while (fscanf(fd, "Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4) - layers.emplace_back(input, output, dropout, str2activation(activation)); - - checkLayersCompatibility(); - - for (auto & layer : layers) - addLayerToModel(layer); -} - -void MLP::loadParameters(const std::string & filename) -{ - dynet::TextFileLoader loader(filename); - std::string prefix("Layer_"); - - for(unsigned int i = 0; i < parameters.size(); i++) - { - parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W"); - parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b"); - } -} - -MLP::MLP(const std::string & filename) -{ - dropoutActive = true; - - randomSeed = ProgramParameters::seed; - trainer.reset(createTrainer()); - initDynet(); - - load(filename); -} - -void MLP::printTopology(FILE * output) -{ - fprintf(output, "("); - for(unsigned int i = 0; i < layers.size(); i++) - { - auto & layer = layers[i]; - - if(i == 0) - fprintf(output, "%d", layer.input_dim); - fprintf(output, "->%d", layer.output_dim); - } - - fprintf(output, ")\n"); -} - -dynet::ParameterCollection & MLP::getModel() -{ - return model; -} - -std::string MLP::expression2str(dynet::Expression & expr) -{ - std::string result = ""; - - auto elem = dynet::as_vector(expr.value()); - - for (auto & f : elem) - result += float2str(f, "%f") + " "; - - if (!result.empty()) - result.pop_back(); - - return result; -} - diff --git a/neural_network/CMakeLists.txt b/neural_network/CMakeLists.txt index a4237344cb6202fdeb49d6e8fc1e8c1f7462b9c2..7974b571ac643726935d21f5c407b333c48dc22b 100644 --- a/neural_network/CMakeLists.txt +++ b/neural_network/CMakeLists.txt @@ -2,3 +2,4 @@ FILE(GLOB SOURCES src/*.cpp) #compiling library add_library(neural_network STATIC ${SOURCES}) +target_link_libraries(neural_network dynet) diff --git a/neural_network/include/MLP.hpp b/neural_network/include/MLP.hpp index b927afbc9705bc9c68b3e679c9ba6e90057842c6..fab20cf35774e9096da34bc15dd6f7208f304a07 100644 --- a/neural_network/include/MLP.hpp +++ b/neural_network/include/MLP.hpp @@ -7,12 +7,13 @@ #define MLP__H #include "NeuralNetwork.hpp" +#include "ProgramParameters.hpp" /// @brief Multi Layer Perceptron. /// /// It is capable of training itself given a batch of examples.\n /// Once trained, it can also be used to predict the class of a certain input. -class MLP : NeuralNetwork +class MLP : public NeuralNetwork { public : @@ -98,14 +99,7 @@ class MLP : NeuralNetwork /// @brief Set dynet and srand() seeds. /// /// @return The DynetParams containing the set seed. - dynet::DynetParams & getDefaultParams() override; - /// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the Multi Layer Perceptron. - /// - /// @param cg The current Computation Graph. - /// @param fv The FeatureValue that will be converted. - /// - /// @return A dynet Expression of value fv that can be used as an input in the Multi Layer Perceptron - dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv); + dynet::DynetParams & getDefaultParams(); /// @brief Compute the image of input x by the Multi Layer Perceptron. /// /// @param cg The current computation graph. @@ -168,12 +162,6 @@ class MLP : NeuralNetwork public : - /// @brief Convert a dynet expression to a string (usefull for debug purposes) - /// - /// @param expr The expression to convert. - /// - /// @return A string representing the expression. - static std::string expression2str(dynet::Expression & expr); /// @brief initialize a new untrained MLP from a desired topology. /// /// topology example for 2 hidden layers : (150,RELU,0.3)(50,ELU,0.2)\n @@ -195,7 +183,7 @@ class MLP : NeuralNetwork /// @param fd The input to use. /// /// @return A vector containing one score per possible class. - std::vector<float> predict(FeatureModel::FeatureDescription & fd); + std::vector<float> predict(FeatureModel::FeatureDescription & fd) override; /// @brief Update the parameters according to the given gold class. /// @@ -203,24 +191,20 @@ class MLP : NeuralNetwork /// @param gold The gold class of this input. /// /// @return The loss. - float update(FeatureModel::FeatureDescription & fd, int gold); + float update(FeatureModel::FeatureDescription & fd, int gold) override; /// @brief Save the MLP to a file. /// /// @param filename The file to write the MLP to. - void save(const std::string & filename); + void save(const std::string & filename) override; /// @brief Print the topology (Layers) of the MLP. /// /// @param output Where the topology will be printed. - void printTopology(FILE * output); + void printTopology(FILE * output) override; /// @brief Allocate the correct trainer type depending on the program parameters. /// /// @return A pointer to the newly allocated trainer. dynet::Trainer * createTrainer(); - /// @brief Return the model. - /// - /// @return The model of this MLP. - dynet::ParameterCollection & getModel(); }; #endif diff --git a/neural_network/include/NeuralNetwork.hpp b/neural_network/include/NeuralNetwork.hpp index 52c81cb686747a872ddf0d55cfa07067ad515a1c..00e2fd12442fde7948436d2e93dbd014d93ba863 100644 --- a/neural_network/include/NeuralNetwork.hpp +++ b/neural_network/include/NeuralNetwork.hpp @@ -6,11 +6,13 @@ #include <dynet/training.h> #include <dynet/timing.h> #include <dynet/expr.h> +#include <dynet/io.h> +#include <string> #include "FeatureModel.hpp" class NeuralNetwork { - private : + protected : /// @brief The seed that will be used by RNG (srand and dynet) static int randomSeed; @@ -21,6 +23,16 @@ class NeuralNetwork /// @brief The dynet model containing the parameters to be trained. dynet::ParameterCollection model; + protected : + + /// @brief Convert a FeatureValue to a dynet Expression that will be used as an input of the NeuralNetwork. + /// + /// @param cg The current Computation Graph. + /// @param fv The FeatureValue that will be converted. + /// + /// @return A dynet Expression of value fv that can be used as an input in the NeuralNetwork + dynet::Expression featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv); + public : /// @brief Convert a dynet expression to a string (usefull for debug purposes) diff --git a/neural_network/src/MLP.cpp b/neural_network/src/MLP.cpp index 54f0d73464805df55b096a2769937d9c5bacab5b..add22c627c9dfd06b9c0f49de23408a9272e9077 100644 --- a/neural_network/src/MLP.cpp +++ b/neural_network/src/MLP.cpp @@ -1,3 +1,5 @@ +#include "MLP.hpp" + std::string MLP::activation2str(Activation a) { switch(a) @@ -292,27 +294,6 @@ dynet::DynetParams & MLP::getDefaultParams() return params; } -dynet::Expression MLP::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv) -{ - std::vector<dynet::Expression> expressions; - - for (unsigned int i = 0; i < fv.dicts.size(); i++) - { - Dict * dict = fv.dicts[i]; - bool isConst = (fv.policies[i] == FeatureModel::Policy::Final) || (dict->mode == Dict::Mode::OneHot); - - auto & lu = dict->getLookupParameter(); - unsigned int index = dict->getValue(fv.values[i]); - - if(isConst) - expressions.emplace_back(dynet::const_lookup(cg, lu, index)); - else - expressions.emplace_back(dynet::lookup(cg, lu, index)); - } - - return dynet::concatenate(expressions); -} - dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x) { static std::vector< std::pair<std::string,dynet::Expression> > exprForDebug; @@ -526,23 +507,3 @@ void MLP::printTopology(FILE * output) fprintf(output, ")\n"); } -dynet::ParameterCollection & MLP::getModel() -{ - return model; -} - -std::string MLP::expression2str(dynet::Expression & expr) -{ - std::string result = ""; - - auto elem = dynet::as_vector(expr.value()); - - for (auto & f : elem) - result += float2str(f, "%f") + " "; - - if (!result.empty()) - result.pop_back(); - - return result; -} - diff --git a/neural_network/src/NeuralNetwork.cpp b/neural_network/src/NeuralNetwork.cpp index 894e8494b0af4e22690d3f00ae633f006c8cda02..d12adfc303a583dff2b5a79401775cf9fa37a1b5 100644 --- a/neural_network/src/NeuralNetwork.cpp +++ b/neural_network/src/NeuralNetwork.cpp @@ -23,3 +23,24 @@ dynet::ParameterCollection & NeuralNetwork::getModel() return model; } +dynet::Expression NeuralNetwork::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv) +{ + std::vector<dynet::Expression> expressions; + + for (unsigned int i = 0; i < fv.dicts.size(); i++) + { + Dict * dict = fv.dicts[i]; + bool isConst = (fv.policies[i] == FeatureModel::Policy::Final) || (dict->mode == Dict::Mode::OneHot); + + auto & lu = dict->getLookupParameter(); + unsigned int index = dict->getValue(fv.values[i]); + + if(isConst) + expressions.emplace_back(dynet::const_lookup(cg, lu, index)); + else + expressions.emplace_back(dynet::lookup(cg, lu, index)); + } + + return dynet::concatenate(expressions); +} + diff --git a/transition_machine/CMakeLists.txt b/transition_machine/CMakeLists.txt index 56c1cff5fc2063caa46268adc0ca9907f87100b9..9ee9092d558a2e0698734620d3a6a55d2953b943 100644 --- a/transition_machine/CMakeLists.txt +++ b/transition_machine/CMakeLists.txt @@ -3,4 +3,4 @@ FILE(GLOB SOURCES src/*.cpp) #compiling library add_library(transition_machine STATIC ${SOURCES}) target_link_libraries(transition_machine maca_common) -target_link_libraries(transition_machine MLP) +target_link_libraries(transition_machine neural_network) diff --git a/transition_machine/include/Classifier.hpp b/transition_machine/include/Classifier.hpp index b7f7dfc7f7ef455a3876c0b0b0924ab655048d27..bfffadf35a6cceb46d12a0ffa1eb8d0c3e7b8838 100644 --- a/transition_machine/include/Classifier.hpp +++ b/transition_machine/include/Classifier.hpp @@ -11,6 +11,7 @@ #include "FeatureModel.hpp" #include "ActionSet.hpp" #include "Oracle.hpp" +#include "NeuralNetwork.hpp" #include "MLP.hpp" #include "ProgramParameters.hpp" @@ -51,7 +52,7 @@ class Classifier std::unique_ptr<ActionSet> as; /// @brief The neural network used by this Classifier. /// The neural network is only used for Classifier of type Prediction. - std::unique_ptr<MLP> mlp; + std::unique_ptr<NeuralNetwork> nn; /// @brief A string describing the topology of the underlying neural network. std::string topology; /// @brief The oracle being used by this Classifier. diff --git a/transition_machine/src/Classifier.cpp b/transition_machine/src/Classifier.cpp index e422c7de7ac42284a6b9810bc3b7bc6113809c47..7dc2ee21e07bb37f45ec20af6191281c46599540 100644 --- a/transition_machine/src/Classifier.cpp +++ b/transition_machine/src/Classifier.cpp @@ -97,7 +97,7 @@ Classifier::WeightedActions Classifier::weightActions(Config & config) initClassifier(config); auto fd = fm->getFeatureDescription(config); - auto scores = mlp->predict(fd); + auto scores = nn->predict(fd); if (ProgramParameters::showFeatureRepresentation == 1) fd.printForDebug(stderr); @@ -122,20 +122,20 @@ void Classifier::initClassifier(Config & config) if(type != Type::Prediction) return; - if(mlp.get()) + if(nn.get()) return; std::string modelFilename = ProgramParameters::expPath + name + ".model"; if (fileExists(modelFilename)) { - mlp.reset(new MLP(modelFilename)); - Dict::initDicts(mlp->getModel(), name); + nn.reset(new MLP(modelFilename)); + Dict::initDicts(nn->getModel(), name); return; } - mlp.reset(new MLP()); + nn.reset(new MLP()); - Dict::initDicts(mlp->getModel(), name); + Dict::initDicts(nn->getModel(), name); auto fd = fm->getFeatureDescription(config); @@ -146,7 +146,7 @@ void Classifier::initClassifier(Config & config) for (auto dict : feat.dicts) nbInputs += dict->getDimension(); - mlp->init(nbInputs, topology, nbOutputs); + nn->init(nbInputs, topology, nbOutputs); } FeatureModel::FeatureDescription Classifier::getFeatureDescription(Config & config) @@ -218,7 +218,7 @@ void Classifier::save(const std::string & filename) exit(1); } - mlp->save(filename); + nn->save(filename); } Action * Classifier::getAction(const std::string & name) @@ -234,7 +234,7 @@ bool Classifier::needsTrain() void Classifier::printTopology(FILE * output) { fprintf(output, "%s topology : ", name.c_str()); - mlp->printTopology(output); + nn->printTopology(output); } int Classifier::getActionCost(Config & config, const std::string & action) @@ -259,7 +259,7 @@ std::vector<std::string> Classifier::getZeroCostActions(Config & config) float Classifier::trainOnExample(Config & config, int gold) { auto fd = fm->getFeatureDescription(config); - return mlp->update(fd, gold); + return nn->update(fd, gold); } void Classifier::explainCostOfActions(FILE * output, Config & config)