Select Git revision
Franck Dary authored
MLP.cpp 10.98 KiB
#include "MLP.hpp"
#include "File.hpp"
#include "util.hpp"
#include <dynet/param-init.h>
#include <dynet/io.h>
std::string MLP::activation2str(Activation a)
{
switch(a)
{
case LINEAR :
return "LINEAR";
break;
case RELU :
return "RELU";
break;
case ELU :
return "ELU";
break;
case CUBE :
return "CUBE";
break;
case SIGMOID :
return "SIGMOID";
break;
case TANH :
return "TANH";
break;
case SOFTMAX :
return "SOFTMAX";
break;
case SPARSEMAX :
return "SPARSEMAX";
break;
default :
break;
}
return "UNKNOWN";
}
MLP::Activation MLP::str2activation(std::string s)
{
if(s == "LINEAR")
return LINEAR;
else if(s == "RELU")
return RELU;
else if(s == "ELU")
return ELU;
else if(s == "CUBE")
return CUBE;
else if(s == "SIGMOID")
return SIGMOID;
else if(s == "TANH")
return TANH;
else if(s == "SOFTMAX")
return SOFTMAX;
else if(s == "SPARSEMAX")
return SPARSEMAX;
else
{
fprintf(stderr, "ERROR (%s) : invalid activation \'%s\'. Aborting\n",ERRINFO, s.c_str());
exit(1);
}
return LINEAR;
}
MLP::MLP(std::vector<Layer> layers)
: layers(layers), trainer(model, 0.001, 0.9, 0.999, 1e-8)
{
dynet::initialize(getDefaultParams());
trainMode = true;
dropoutActive = true;
checkLayersCompatibility();
for(Layer layer : layers)
addLayerToModel(layer);
}
void MLP::addLayerToModel(Layer & layer)
{
dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim});
dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim});
parameters.push_back({W,b});
}
void MLP::checkLayersCompatibility()
{
if(layers.empty())
{
fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO);
exit(1);
}
for(unsigned int i = 0; i < layers.size()-1; i++)
if(layers[i].output_dim != layers[i+1].input_dim)
{
fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO);
exit(1);
}
}
MLP::Layer::Layer(int input_dim, int output_dim,
float dropout_rate, Activation activation)
{
this->input_dim = input_dim;
this->output_dim = output_dim;
this->dropout_rate = dropout_rate;
this->activation = activation;
}
std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd)
{
bool currentDropoutActive = dropoutActive;
dropoutActive = false;
dynet::ComputationGraph cg;
std::vector<dynet::Expression> expressions;
for (auto & featValue : fd.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
dynet::Expression input = dynet::concatenate(expressions);
dynet::Expression output = run(cg, input);
dropoutActive = currentDropoutActive;
return as_vector(cg.forward(output));
}
dynet::DynetParams & MLP::getDefaultParams()
{
static dynet::DynetParams params;
params.random_seed = 100;
std::srand(params.random_seed);
return params;
}
dynet::Expression MLP::featValue2Expression(dynet::ComputationGraph & cg, const FeatureModel::FeatureValue & fv)
{
Dict * dict = fv.dict;
auto entry = lookupParameters.find(dict);
if(entry == lookupParameters.end())
{
lookupParameters[dict].first = model.add_lookup_parameters(MAXLOOKUPSIZE, {(unsigned)dict->getDimension(),1});
}
auto & ptr2index = lookupParameters[dict].second;
auto & lu = lookupParameters[dict].first;
bool isConst = (fv.policy == FeatureModel::Policy::Final) || (dict->mode == Dict::Mode::OneHot);
auto it = ptr2index.find(fv.vec);
if(it != ptr2index.end())
{
if(isConst)
return dynet::const_lookup(cg, lu, it->second);
else
return dynet::lookup(cg, lu, it->second);
}
ptr2index[fv.vec] = ptr2index.size();
it = ptr2index.find(fv.vec);
unsigned int lookupSize = (int)(*lu.values()).size();
if(it->second >= lookupSize)
{
fprintf(stderr, "ERROR (%s) : MAXLOOKUPSIZE (%d) is too small. Aborting.\n", ERRINFO, MAXLOOKUPSIZE);
exit(1);
}
// Horrible trick : directly set Dict data as Tensor values
// Works only on CPU
(*lu.values())[it->second].v = fv.vec->data();
if(isConst)
return dynet::const_lookup(cg, lu, it->second);
else
return dynet::lookup(cg, lu, it->second);
}
dynet::Expression MLP::run(dynet::ComputationGraph & cg, dynet::Expression x)
{
// Expression for the current hidden state
dynet::Expression h_cur = x;
for(unsigned int l = 0; l < layers.size(); l++)
{
// Initialize parameters in computation graph
dynet::Expression W = parameter(cg, parameters[l][0]);
dynet::Expression b = parameter(cg, parameters[l][1]);
// Apply affine transform
dynet::Expression a = dynet::affine_transform({b, W, h_cur});
// Apply activation function
dynet::Expression h = activate(a, layers[l].activation);
// Take care of dropout
dynet::Expression h_dropped;
if(layers[l].dropout_rate > 0){
if(dropoutActive){
dynet::Expression mask = random_bernoulli(cg,
{(unsigned int)layers[l].output_dim}, 1 - layers[l].dropout_rate);
h_dropped = cmult(h, mask);
}
else{
h_dropped = h * (1 - layers[l].dropout_rate);
}
}
else{
h_dropped = h;
}
h_cur = h_dropped;
}
return h_cur;
}
inline dynet::Expression MLP::activate(dynet::Expression h, Activation f)
{
switch(f)
{
case LINEAR :
return h;
break;
case RELU :
return rectify(h);
break;
case ELU :
return elu(h);
break;
case SIGMOID :
return logistic(h);
break;
case TANH :
return tanh(h);
break;
case SOFTMAX :
return softmax(h);
break;
default :
break;
}
return h;
}
void MLP::printParameters(FILE * output)
{
fprintf(output, "Parameters : NOT IMPLEMENTED\n");
}
int MLP::trainOnBatch(Examples & examples, int start, int end)
{
dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs;
std::vector<unsigned int> goldClasses;
int inputDim = 0;
int outputDim = layers.back().output_dim;
for(int i = start; i < end; i++)
{
auto & order = examples.first;
int exampleIndex = order[i];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions;
expressions.clear();
for (auto & featValue : example.second.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)example.first);
}
dynet::Expression concatenation = dynet::concatenate(inputs);
int batchSize = end - start;
dynet::Expression batchedInput = reshape((concatenation),
dynet::Dim({(unsigned)inputDim}, batchSize));
dynet::Expression output = run(cg, batchedInput);
if(trainMode)
{
dynet::Expression batchedLoss = pickneglogsoftmax(output, goldClasses);
dynet::Expression loss = sum_batches(batchedLoss);
cg.backward(loss);
trainer.update();
}
int nbCorrect = 0;
std::vector<float> predictions = as_vector(output.value());
for (unsigned int i = 0; (int)i < batchSize; i++)
{
int prediction = 0;
for (unsigned int j = 0; (int)j < outputDim; j++)
if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
prediction = (int)j;
if(prediction == (int)goldClasses[i])
nbCorrect++;
}
return nbCorrect;
}
int MLP::getScoreOnBatch(Examples & examples, int start, int end)
{
bool currentDropoutActive = dropoutActive;
dropoutActive = false;
dynet::ComputationGraph cg;
std::vector<dynet::Expression> inputs;
std::vector<unsigned int> goldClasses;
int inputDim = 0;
int outputDim = layers.back().output_dim;
for(int i = start; i < end; i++)
{
auto & order = examples.first;
int exampleIndex = order[i];
auto & example = examples.second[exampleIndex];
std::vector<dynet::Expression> expressions;
expressions.clear();
for (auto & featValue : example.second.values)
expressions.emplace_back(featValue2Expression(cg, featValue));
inputs.emplace_back(dynet::concatenate(expressions));
inputDim = inputs.back().dim().rows();
goldClasses.emplace_back((unsigned)example.first);
}
dynet::Expression concatenation = dynet::concatenate(inputs);
int batchSize = end - start;
dynet::Expression batchedInput = reshape((concatenation),
dynet::Dim({(unsigned)inputDim}, batchSize));
dynet::Expression output = run(cg, batchedInput);
int nbCorrect = 0;
std::vector<float> predictions = as_vector(output.value());
for (unsigned int i = 0; (int)i < batchSize; i++)
{
int prediction = 0;
for (unsigned int j = 0; (int)j < outputDim; j++)
if(predictions[i*outputDim+j] > predictions[i*outputDim+prediction])
prediction = (int)j;
if(prediction == (int)goldClasses[i])
nbCorrect++;
}
dropoutActive = currentDropoutActive;
return nbCorrect;
}
void MLP::save(const std::string & filename)
{
saveStruct(filename);
saveParameters(filename);
}
void MLP::saveStruct(const std::string & filename)
{
File file(filename, "w");
FILE * fd = file.getDescriptor();
for (auto & layer : layers)
{
fprintf(fd, "Layer : %d %d %s %.2f\n", layer.input_dim, layer.output_dim, activation2str(layer.activation).c_str(), layer.dropout_rate);
}
}
void MLP::saveParameters(const std::string & filename)
{
dynet::TextFileSaver s(filename, true);
std::string prefix("Layer_");
for(unsigned int i = 0; i < parameters.size(); i++)
{
s.save(parameters[i][0], prefix + std::to_string(i) + "_W");
s.save(parameters[i][1], prefix + std::to_string(i) + "_b");
}
}
void MLP::load(const std::string & filename)
{
loadStruct(filename);
loadParameters(filename);
}
void MLP::loadStruct(const std::string & filename)
{
File file(filename, "r");
FILE * fd = file.getDescriptor();
char activation[1024];
int input;
int output;
float dropout;
while (fscanf(fd, "Layer : %d %d %s %f\n", &input, &output, activation, &dropout) == 4)
layers.emplace_back(input, output, dropout, str2activation(activation));
checkLayersCompatibility();
for (auto & layer : layers)
addLayerToModel(layer);
}
void MLP::loadParameters(const std::string & filename)
{
dynet::TextFileLoader loader(filename);
std::string prefix("Layer_");
for(unsigned int i = 0; i < parameters.size(); i++)
{
parameters[i][0] = loader.load_param(model, prefix + std::to_string(i) + "_W");
parameters[i][1] = loader.load_param(model, prefix + std::to_string(i) + "_b");
}
}
MLP::MLP(const std::string & filename)
: trainer(model, 0.001, 0.9, 0.999, 1e-8)
{
dynet::initialize(getDefaultParams());
trainMode = false;
dropoutActive = false;
load(filename);
}
void MLP::printTopology(FILE * output)
{
fprintf(output, "(");
for(unsigned int i = 0; i < layers.size(); i++)
{
auto & layer = layers[i];
if(i == 0)
fprintf(output, "%d", layer.input_dim);
fprintf(output, "->%d", layer.output_dim);
}
fprintf(output, ")\n");
}