Skip to content
Snippets Groups Projects
Select Git revision
  • b8e657ce3f87b8316e8bf95a9b22226749561a67
  • master default protected
  • fullUD
  • movementInAction
4 results

MLP.cpp

  • MLP.cpp 6.95 KiB
    #include "MLP.hpp"
    #include "util.hpp"
    
    #include <dynet/param-init.h>
    #include <dynet/io.h>
    
    std::string MLP::activation2str(Activation a)
    {
      switch(a)
      {
        case LINEAR :
          return "LINEAR";
          break;
        case RELU :
          return "RELU";
          break;
        case CUBE :
          return "CUBE";
          break;
        case SIGMOID :
          return "SIGMOID";
          break;
        case TANH :
          return "TANH";
          break;
        case SOFTMAX :
          return "SOFTMAX";
          break;
        case SPARSEMAX :
          return "SPARSEMAX";
          break;
        default :
          break;
      }
    
      return "UNKNOWN";
    }
    
    MLP::Activation MLP::str2activation(std::string s)
    {
      if(s == "LINEAR")
        return LINEAR;
      else if(s == "RELU")
        return RELU;
      else if(s == "CUBE")
        return CUBE;
      else if(s == "SIGMOID")
        return SIGMOID;
      else if(s == "TANH")
        return TANH;
      else if(s == "SOFTMAX")
        return SOFTMAX;
      else if(s == "SPARSEMAX")
        return SPARSEMAX;
      else
      {
        fprintf(stderr, "ERROR (%s) : invalid activation \'%s\'. Aborting\n",ERRINFO, s.c_str());
        exit(1);
      }
    
      return LINEAR;
    }
    
    MLP::MLP(std::vector<Layer> layers)
    : layers(layers), trainer(model, 0.001, 0.9, 0.999, 1e-8)
    {
      dynet::initialize(getDefaultParams());
    
      trainMode = true;
    
      checkLayersCompatibility();
    
      for(Layer layer : layers)
        addLayerToModel(layer);
    }
    
    void MLP::addLayerToModel(Layer & layer)
    {
      dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim});
      dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim});
      parameters.push_back({W,b});
    }
    
    void MLP::checkLayersCompatibility()
    {
      if(layers.empty())
      {
        fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO);
        exit(1);
      }
    
      for(unsigned int i = 0; i < layers.size()-1; i++)
        if(layers[i].output_dim != layers[i+1].input_dim)
        {
          fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO);
          exit(1);
        }
    }
    
    MLP::Layer::Layer(int input_dim, int output_dim,
                      float dropout_rate, Activation activation)
    {
      this->input_dim = input_dim;
      this->output_dim = output_dim;
      this->dropout_rate = dropout_rate;
      this->activation = activation;
    }
    
    std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd, int goldClass)
    {
      dynet::ComputationGraph cg;
    
      std::vector<dynet::Expression> expressions;
    
      for (auto & featValue : fd.values)
      {
        if(featValue.policy == FeatureModel::Policy::Final)
          expressions.emplace_back(dynet::const_parameter(cg, featValue2parameter(featValue)));
        else
          expressions.emplace_back(dynet::parameter(cg, featValue2parameter(featValue)));
      }
    
      dynet::Expression input = dynet::concatenate(expressions);
    
      dynet::Expression output = run(cg, input);
    
      if(trainMode)
      {
        cg.backward(pickneglogsoftmax(output, goldClass));
        trainer.update();
      }
    
      return as_vector(cg.forward(output));
    }
    
    dynet::DynetParams & MLP::getDefaultParams()
    {
      static dynet::DynetParams params;
      params.random_seed = 100;
    
      std::srand(params.random_seed);
    
      return params;
    }
    
    dynet::Parameter & MLP::featValue2parameter(const FeatureModel::FeatureValue & fv)
    {
      auto it = ptr2parameter.find(fv.vec);
    
      if(it != ptr2parameter.end())
        return it->second;
    
      ptr2parameter[fv.vec] = model.add_parameters({(unsigned)fv.vec->size(),1});
      it = ptr2parameter.find(fv.vec);
    
      it->second.values()->v = fv.vec->data();
    
      return it->second;
    }
    
    dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x)
    {
      // Expression for the current hidden state
      dynet::Expression h_cur = x;
    
      for(unsigned int l = 0; l < layers.size(); l++)
      {
        // Initialize parameters in computation graph
        dynet::Expression W = parameter(cg, parameters[l][0]);
        dynet::Expression b = parameter(cg, parameters[l][1]);
        // Apply affine transform
        dynet::Expression a = dynet::affine_transform({b, W, h_cur});
        // Apply activation function
        dynet::Expression h = activate(a, layers[l].activation);
        h_cur = h;
        // Take care of dropout
        /*
        dynet::Expression h_dropped;
        if(layers[l].dropout_rate > 0){
          if(dropout_active){
            dynet::Expression mask = random_bernoulli(cg, 
             {layers[l].output_dim}, 1 - layers[l].dropout_rate);
            h_dropped = cmult(h, mask);
          }
          else{
            h_dropped = h * (1 - layers[l].dropout_rate);
          }
        }
        else{
          h_dropped = h;
        }
    
        h_cur = h_dropped;
      */
      }
    
      return h_cur;
    }
    
    inline dynet::Expression MLP::activate(dynet::Expression h, Activation f)
    {
      switch(f)
      {
        case LINEAR :
          return h;
          break;
        case RELU :
          return rectify(h);
          break;
        case SIGMOID :
          return logistic(h);
          break;
        case TANH :
          return tanh(h);
          break;
        case SOFTMAX :
          return softmax(h);
          break;
        default :
          break;
      }
    
      return h;
    }
    
    void MLP::printParameters(FILE * output)
    {
      for(auto & it : ptr2parameter)
      {
        auto & param = it.second;
        dynet::Tensor * tensor = param.values();
        float * value = tensor->v;
        int dim = tensor->d.size();
        fprintf(output, "Param : ");
        for(int i = 0; i < dim; i++)
          fprintf(output, "%.2f ", value[i]);
        fprintf(output, "\n");
      }
    }
    
    int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end)
    {
      dynet::ComputationGraph cg;
      std::vector<dynet::Expression> inputs;
      std::vector<unsigned int> goldClasses;
      int inputDim = 0;
      int outputDim = layers.back().output_dim;
    
      for(auto it = start; it != end; it++)
      {
        std::vector<dynet::Expression> expressions;
        expressions.clear();
    
        for (auto & featValue : it->second.values)
        {
          if(featValue.policy == FeatureModel::Policy::Final)
            expressions.emplace_back(dynet::const_parameter(cg, featValue2parameter(featValue)));
          else
            expressions.emplace_back(dynet::parameter(cg, featValue2parameter(featValue)));
        }
    
        inputs.emplace_back(dynet::concatenate(expressions));
        inputDim = inputs.back().dim().rows();
        goldClasses.emplace_back((unsigned)it->first);
      }
    
      dynet::Expression concatenation = dynet::concatenate(inputs);
      int batchSize = end - start;
    
      dynet::Expression batchedInput = reshape((concatenation),
        dynet::Dim({(unsigned)inputDim}, batchSize));
    
      dynet::Expression output = run(cg, batchedInput);
    
      if(trainMode)
      {
        dynet::Expression batchedLoss = pickneglogsoftmax(output, goldClasses);
        dynet::Expression loss = sum_batches(batchedLoss);
        cg.backward(loss);
        trainer.update();
      }
    
      int nbCorrect = 0;
      std::vector<float> predictions = as_vector(output.value());
      for (unsigned int i = 0; (int)i < batchSize; i++)
      {
        int prediction = 0;
    
        for (unsigned int j = 0; (int)j < outputDim; j++)
          if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
            prediction = (int)j;
    
        if(prediction == (int)goldClasses[i])
          nbCorrect++;
      }
    
      return nbCorrect;
    }