Select Git revision
MLP.cpp
MLP.cpp 6.95 KiB
#include "MLP.hpp"
#include "util.hpp"
#include <dynet/param-init.h>
#include <dynet/io.h>
std::string MLP::activation2str(Activation a)
{
switch(a)
{
case LINEAR :
return "LINEAR";
break;
case RELU :
return "RELU";
break;
case CUBE :
return "CUBE";
break;
case SIGMOID :
return "SIGMOID";
break;
case TANH :
return "TANH";
break;
case SOFTMAX :
return "SOFTMAX";
break;
case SPARSEMAX :
return "SPARSEMAX";
break;
default :
break;
}
return "UNKNOWN";
}
MLP::Activation MLP::str2activation(std::string s)
{
if(s == "LINEAR")
return LINEAR;
else if(s == "RELU")
return RELU;
else if(s == "CUBE")
return CUBE;
else if(s == "SIGMOID")
return SIGMOID;
else if(s == "TANH")
return TANH;
else if(s == "SOFTMAX")
return SOFTMAX;
else if(s == "SPARSEMAX")
return SPARSEMAX;
else
{
fprintf(stderr, "ERROR (%s) : invalid activation \'%s\'. Aborting\n",ERRINFO, s.c_str());
exit(1);
}
return LINEAR;
}
MLP::MLP(std::vector<Layer> layers)
: layers(layers), trainer(model, 0.001, 0.9, 0.999, 1e-8)
{
dynet::initialize(getDefaultParams());
trainMode = true;
checkLayersCompatibility();
for(Layer layer : layers)
addLayerToModel(layer);
}
void MLP::addLayerToModel(Layer & layer)
{
dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim});
dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim});
parameters.push_back({W,b});
}
void MLP::checkLayersCompatibility()
{
if(layers.empty())
{
fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO);
exit(1);
}
for(unsigned int i = 0; i < layers.size()-1; i++)
if(layers[i].output_dim != layers[i+1].input_dim)
{
fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO);
exit(1);
}
}
MLP::Layer::Layer(int input_dim, int output_dim,
float dropout_rate, Activation activation)
{
this->input_dim = input_dim;
this->output_dim = output_dim;
this->dropout_rate = dropout_rate;
this->activation = activation;
}
std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd, int goldClass)
{
dynet::ComputationGraph cg;
std::vector<dynet::Expression> expressions;
for (auto & featValue : fd.values)
{
if(featValue.policy == FeatureModel::Policy::Final)
expressions.emplace_back(dynet::const_parameter(cg, featValue2parameter(featValue)));
else
expressions.emplace_back(dynet::parameter(cg, featValue2parameter(featValue)));
}
dynet::Expression input = dynet::concatenate(expressions);
dynet::Expression output = run(cg, input);
if(trainMode)
{
cg.backward(pickneglogsoftmax(output, goldClass));
trainer.update();
}
return as_vector(cg.forward(output));
}
dynet::DynetParams & MLP::getDefaultParams()
{
static dynet::DynetParams params;
params.random_seed = 100;
std::srand(params.random_seed);
return params;
}
dynet::Parameter & MLP::featValue2parameter(const FeatureModel::FeatureValue & fv)
{
auto it = ptr2parameter.find(fv.vec);
if(it != ptr2parameter.end())
return it->second;
ptr2parameter[fv.vec] = model.add_parameters({(unsigned)fv.vec->size(),1});
it = ptr2parameter.find(fv.vec);
it->second.values()->v = fv.vec->data();
return it->second;
}
dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x)
{
// Expression for the current hidden state
dynet::Expression h_cur = x;
for(unsigned int l = 0; l < layers.size(); l++)
{
// Initialize parameters in computation graph
dynet::Expression W = parameter(cg, parameters[l][0]);
dynet::Expression b = parameter(cg, parameters[l][1]);
// Apply affine transform
dynet::Expression a = dynet::affine_transform({b, W, h_cur});
// Apply activation function
dynet::Expression h = activate(a, layers[l].activation);
h_cur = h;
// Take care of dropout
/*
dynet::Expression h_dropped;
if(layers[l].dropout_rate > 0){
if(dropout_active){
dynet::Expression mask = random_bernoulli(cg,
{layers[l].output_dim}, 1 - layers[l].dropout_rate);
h_dropped = cmult(h, mask);
}
else{
h_dropped = h * (1 - layers[l].dropout_rate);
}
}
else{
h_dropped = h;
}
h_cur = h_dropped;
*/
}
return h_cur;
}
inline dynet::Expression MLP::activate(dynet::Expression h, Activation f)
{
switch(f)
{
case LINEAR :
return h;
break;
case RELU :
return rectify(h);
break;
case SIGMOID :
return logistic(h);
break;
case TANH :
return tanh(h);
break;
case SOFTMAX :
return softmax(h);
break;
default :
break;
}
return h;
}
void MLP::printParameters(FILE * output)
{
for(auto & it : ptr2parameter)
{
auto & param = it.second;
dynet::Tensor * tensor = param.values();
float * value = tensor->v;
int dim = tensor->d.size();
fprintf(output, "Param : ");
for(int i = 0; i < dim; i++)
fprintf(output, "%.2f ", value[i]);
fprintf(output, "\n");
}
}
int MLP::trainOnBatch(std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & start, std::vector<std::pair<int, FeatureModel::FeatureDescription> >::iterator & end)
{
dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs;
std::vector<unsigned int> goldClasses;
int inputDim = 0;
int outputDim = layers.back().output_dim;
for(auto it = start; it != end; it++)
{
std::vector<dynet::Expression> expressions;
expressions.clear();
for (auto & featValue : it->second.values)
{
if(featValue.policy == FeatureModel::Policy::Final)
expressions.emplace_back(dynet::const_parameter(cg, featValue2parameter(featValue)));
else
expressions.emplace_back(dynet::parameter(cg, featValue2parameter(featValue)));
}
inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)it->first);
}
dynet::Expression concatenation = dynet::concatenate(inputs);
int batchSize = end - start;
dynet::Expression batchedInput = reshape((concatenation),
dynet::Dim({(unsigned)inputDim}, batchSize));
dynet::Expression output = run(cg, batchedInput);
if(trainMode)
{
dynet::Expression batchedLoss = pickneglogsoftmax(output, goldClasses);
dynet::Expression loss = sum_batches(batchedLoss);
cg.backward(loss);
trainer.update();
}
int nbCorrect = 0;
std::vector<float> predictions = as_vector(output.value());
for (unsigned int i = 0; (int)i < batchSize; i++)
{
int prediction = 0;
for (unsigned int j = 0; (int)j < outputDim; j++)
if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
prediction = (int)j;
if(prediction == (int)goldClasses[i])
nbCorrect++;
}
return nbCorrect;
}