Skip to content
Snippets Groups Projects
Select Git revision
  • bac54e14d29e7e88e3f5e9cc0e89a95285e4a4df
  • master default protected
  • fullUD
  • movementInAction
4 results

MLP.cpp

Blame
  • MLP.cpp 10.98 KiB
    #include "MLP.hpp"
    #include "File.hpp"
    #include "util.hpp"
    
    #include <dynet/param-init.h>
    #include <dynet/io.h>
    
    std::string MLP::activation2str(Activation a)
    {
      switch(a)
      {
        case LINEAR :
          return "LINEAR";
          break;
        case RELU :
          return "RELU";
          break;
        case ELU :
          return "ELU";
          break;
        case CUBE :
          return "CUBE";
          break;
        case SIGMOID :
          return "SIGMOID";
          break;
        case TANH :
          return "TANH";
          break;
        case SOFTMAX :
          return "SOFTMAX";
          break;
        case SPARSEMAX :
          return "SPARSEMAX";
          break;
        default :
          break;
      }
    
      return "UNKNOWN";
    }
    
    MLP::Activation MLP::str2activation(std::string s)
    {
      if(s == "LINEAR")
        return LINEAR;
      else if(s == "RELU")
        return RELU;
      else if(s == "ELU")
        return ELU;
      else if(s == "CUBE")
        return CUBE;
      else if(s == "SIGMOID")
        return SIGMOID;
      else if(s == "TANH")
        return TANH;
      else if(s == "SOFTMAX")
        return SOFTMAX;
      else if(s == "SPARSEMAX")
        return SPARSEMAX;
      else
      {
        fprintf(stderr, "ERROR (%s) : invalid activation \'%s\'. Aborting\n",ERRINFO, s.c_str());
        exit(1);
      }
    
      return LINEAR;
    }
    
    MLP::MLP(std::vector<Layer> layers)
    : layers(layers), trainer(model, 0.001, 0.9, 0.999, 1e-8)
    {
      dynet::initialize(getDefaultParams());
    
      trainMode = true;
      dropoutActive = true;
    
      checkLayersCompatibility();
    
      for(Layer layer : layers)
        addLayerToModel(layer);
    }
    
    void MLP::addLayerToModel(Layer & layer)
    {
      dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim});
      dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim});
      parameters.push_back({W,b});
    }
    
    void MLP::checkLayersCompatibility()
    {
      if(layers.empty())
      {
        fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO);
        exit(1);
      }
    
      for(unsigned int i = 0; i < layers.size()-1; i++)
        if(layers[i].output_dim != layers[i+1].input_dim)
        {
          fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO);
          exit(1);
        }
    }
    
    MLP::Layer::Layer(int input_dim, int output_dim,
                      float dropout_rate, Activation activation)
    {
      this->input_dim = input_dim;
      this->output_dim = output_dim;
      this->dropout_rate = dropout_rate;
      this->activation = activation;
    }
    
    std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd)
    {
      bool currentDropoutActive = dropoutActive;
      dropoutActive = false;
      dynet::ComputationGraph cg;
    
      std::vector<dynet::Expression> expressions;
    
      for (auto & featValue : fd.values)
        expressions.emplace_back(featValue2Expression(cg, featValue));
    
      dynet::Expression input = dynet::concatenate(expressions);
    
      dynet::Expression output = run(cg, input);
    
      dropoutActive = currentDropoutActive;
    
      return as_vector(cg.forward(output));
    }
    
    dynet::DynetParams & MLP::getDefaultParams()
    {
      static dynet::DynetParams params;
      params.random_seed = 100;
    
      std::srand(params.random_seed);
    
      return params;
    }
    
    dynet::Expression MLP::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv)
    {
      Dict * dict = fv.dict;
    
      auto entry = lookupParameters.find(dict);
    
      if(entry == lookupParameters.end())
      {
        lookupParameters[dict].first = model.add_lookup_parameters(MAXLOOKUPSIZE, {(unsigned)dict->getDimension(),1});
      }
    
      auto & ptr2index = lookupParameters[dict].second;
      auto & lu = lookupParameters[dict].first;
    
      bool isConst = (fv.policy == FeatureModel::Policy::Final) || (dict->mode == Dict::Mode::OneHot);
    
      auto it = ptr2index.find(fv.vec);
    
      if(it != ptr2index.end())
      {
        if(isConst)
          return dynet::const_lookup(cg, lu, it->second);
        else
          return dynet::lookup(cg, lu, it->second);
      }
    
      ptr2index[fv.vec] = ptr2index.size();
      it = ptr2index.find(fv.vec);
    
      unsigned int lookupSize = (int)(*lu.values()).size();
      if(it->second >= lookupSize)
      {
        fprintf(stderr, "ERROR (%s) : MAXLOOKUPSIZE (%d) is too small. Aborting.\n", ERRINFO, MAXLOOKUPSIZE);
        exit(1);
      }
    
      // Horrible trick : directly set Dict data as Tensor values
      // Works only on CPU
      (*lu.values())[it->second].v = fv.vec->data();
    
      if(isConst)
        return dynet::const_lookup(cg, lu, it->second);
      else
        return dynet::lookup(cg, lu, it->second);
    }
    
    dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x)
    {
      // Expression for the current hidden state
      dynet::Expression h_cur = x;
    
      for(unsigned int l = 0; l < layers.size(); l++)
      {
        // Initialize parameters in computation graph
        dynet::Expression W = parameter(cg, parameters[l][0]);
        dynet::Expression b = parameter(cg, parameters[l][1]);
        // Apply affine transform
        dynet::Expression a = dynet::affine_transform({b, W, h_cur});
        // Apply activation function
        dynet::Expression h = activate(a, layers[l].activation);
        // Take care of dropout
        dynet::Expression h_dropped;
        if(layers[l].dropout_rate > 0){
          if(dropoutActive){
            dynet::Expression mask = random_bernoulli(cg, 
             {(unsigned int)layers[l].output_dim}, 1 - layers[l].dropout_rate);
            h_dropped = cmult(h, mask);
          }
          else{
            h_dropped = h * (1 - layers[l].dropout_rate);
          }
        }
        else{
          h_dropped = h;
        }
    
        h_cur = h_dropped;
      }
    
      return h_cur;
    }
    
    inline dynet::Expression MLP::activate(dynet::Expression h, Activation f)
    {
      switch(f)
      {
        case LINEAR :
          return h;
          break;
        case RELU :
          return rectify(h);
          break;
        case ELU :
          return elu(h);
          break;
        case SIGMOID :
          return logistic(h);
          break;
        case TANH :
          return tanh(h);
          break;
        case SOFTMAX :
          return softmax(h);
          break;
        default :
          break;
      }
    
      return h;
    }
    
    void MLP::printParameters(FILE * output)
    {
      fprintf(output, "Parameters : NOT IMPLEMENTED\n");
    }
    
    int MLP::trainOnBatch(Examples & examples, int start, int end)
    {
      dynet::ComputationGraph cg;
      std::vector<dynet::Expression> inputs;
      std::vector<unsigned int> goldClasses;
      int inputDim = 0;
      int outputDim = layers.back().output_dim;
    
      for(int i = start; i < end; i++)
      {
        auto & order = examples.first;
        int exampleIndex = order[i];
        auto & example = examples.second[exampleIndex];
    
        std::vector<dynet::Expression> expressions;
        expressions.clear();
    
        for (auto & featValue : example.second.values)
          expressions.emplace_back(featValue2Expression(cg, featValue));
    
        inputs.emplace_back(dynet::concatenate(expressions));
        inputDim = inputs.back().dim().rows();
        goldClasses.emplace_back((unsigned)example.first);
      }
    
      dynet::Expression concatenation = dynet::concatenate(inputs);
      int batchSize = end - start;
    
      dynet::Expression batchedInput = reshape((concatenation),
        dynet::Dim({(unsigned)inputDim}, batchSize));
    
      dynet::Expression output = run(cg, batchedInput);
    
      if(trainMode)
      {
        dynet::Expression batchedLoss = pickneglogsoftmax(output, goldClasses);
        dynet::Expression loss = sum_batches(batchedLoss);
        cg.backward(loss);
        trainer.update();
      }
    
      int nbCorrect = 0;
      std::vector<float> predictions = as_vector(output.value());
      for (unsigned int i = 0; (int)i < batchSize; i++)
      {
        int prediction = 0;
    
        for (unsigned int j = 0; (int)j < outputDim; j++)
          if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
            prediction = (int)j;
    
        if(prediction == (int)goldClasses[i])
          nbCorrect++;
      }
    
      return nbCorrect;
    }
    
    int MLP::getScoreOnBatch(Examples & examples, int start, int end)
    {
      bool currentDropoutActive = dropoutActive;
      dropoutActive = false;
    
      dynet::ComputationGraph cg;
      std::vector<dynet::Expression> inputs;
      std::vector<unsigned int> goldClasses;
      int inputDim = 0;
      int outputDim = layers.back().output_dim;
    
      for(int i = start; i < end; i++)
      {
        auto & order = examples.first;
        int exampleIndex = order[i];
        auto & example = examples.second[exampleIndex];
    
        std::vector<dynet::Expression> expressions;
        expressions.clear();
    
        for (auto & featValue : example.second.values)
          expressions.emplace_back(featValue2Expression(cg, featValue));
    
        inputs.emplace_back(dynet::concatenate(expressions));
        inputDim = inputs.back().dim().rows();
        goldClasses.emplace_back((unsigned)example.first);
      }
    
      dynet::Expression concatenation = dynet::concatenate(inputs);
      int batchSize = end - start;
    
      dynet::Expression batchedInput = reshape((concatenation),
        dynet::Dim({(unsigned)inputDim}, batchSize));
    
      dynet::Expression output = run(cg, batchedInput);
    
      int nbCorrect = 0;
      std::vector<float> predictions = as_vector(output.value());
      for (unsigned int i = 0; (int)i < batchSize; i++)
      {
        int prediction = 0;
    
        for (unsigned int j = 0; (int)j < outputDim; j++)
          if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
            prediction = (int)j;
    
        if(prediction == (int)goldClasses[i])
          nbCorrect++;
      }
    
      dropoutActive = currentDropoutActive;
    
      return nbCorrect;
    }
    
    void MLP::save(const std::string & filename)
    {
      saveStruct(filename);
      saveParameters(filename);
    }
    
    void MLP::saveStruct(const std::string & filename)
    {
      File file(filename, "w");
      FILE * fd = file.getDescriptor();
    
      for (auto & layer : layers)
      {
        fprintf(fd, "Layer : %d %d %s %.2f\n", layer.input_dim, layer.output_dim, activation2str(layer.activation).c_str(), layer.dropout_rate);
      }
    }
    
    void MLP::saveParameters(const std::string & filename)
    {
      dynet::TextFileSaver s(filename, true);
      std::string prefix("Layer_");
    
      for(unsigned int i = 0; i < parameters.size(); i++)
      {
        s.save(parameters[i][0], prefix + std::to_string(i) + "_W");
        s.save(parameters[i][1], prefix + std::to_string(i) + "_b");
      }
    }
    
    void MLP::load(const std::string & filename)
    {
      loadStruct(filename);
      loadParameters(filename);
    }
    
    void MLP::loadStruct(const std::string & filename)
    {
      File file(filename, "r");
      FILE * fd = file.getDescriptor();
    
      char activation[1024];
      int input;
      int output;
      float dropout;
    
      while (fscanf(fd, "Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4)
        layers.emplace_back(input, output, dropout, str2activation(activation));
    
      checkLayersCompatibility();
    
      for (auto & layer : layers)
        addLayerToModel(layer);
    }
    
    void MLP::loadParameters(const std::string & filename)
    {
      dynet::TextFileLoader loader(filename);
      std::string prefix("Layer_");
    
      for(unsigned int i = 0; i < parameters.size(); i++)
      {
        parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W");
        parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b");
      }
    }
    
    MLP::MLP(const std::string & filename)
    : trainer(model, 0.001, 0.9, 0.999, 1e-8)
    {
      dynet::initialize(getDefaultParams());
    
      trainMode = false;
      dropoutActive = false;
    
      load(filename);
    }
    
    void MLP::printTopology(FILE * output)
    {
      fprintf(output, "(");
      for(unsigned int i = 0; i < layers.size(); i++)
      {
        auto & layer = layers[i];
    
        if(i == 0)
          fprintf(output, "%d", layer.input_dim);
        fprintf(output, "->%d", layer.output_dim);
      }
    
      fprintf(output, ")\n");
    }