diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp index 825d705a5b9b6083c314a19624678bf5013a3072..995f03268bda520949a297f69f1e68dc0ee7efaa 100644 --- a/MLP/include/MLP.hpp +++ b/MLP/include/MLP.hpp @@ -51,7 +51,7 @@ class MLP std::map< Dict*, std::pair<dynet::LookupParameter, std::map<void*, unsigned int> > > lookupParameters; dynet::ParameterCollection model; - dynet::AmsgradTrainer trainer; + std::unique_ptr<dynet::AmsgradTrainer> trainer; bool trainMode; bool dropoutActive; @@ -74,6 +74,7 @@ class MLP public : MLP(std::vector<Layer> layers); + MLP(int nbInputs, const std::string & topology, int nbOutputs); MLP(const std::string & filename); std::vector<float> predict(FeatureModel::FeatureDescription & fd); diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp index 7342adc722f948159800ffc9bcd28870fd7796e3..3bc91819a61e1e9b0b5a6f991b2cda82dab0760f 100644 --- a/MLP/src/MLP.cpp +++ b/MLP/src/MLP.cpp @@ -78,9 +78,53 @@ void MLP::initDynet() dynet::initialize(getDefaultParams()); } +MLP::MLP(int nbInputs, const std::string & topology, int nbOutputs) +{ + std::string topo = topology; + std::replace(topo.begin(), topo.end(), '(', ' '); + std::replace(topo.begin(), topo.end(), ')', ' '); + + auto groups = split(topo); + for (auto group : groups) + { + if(group.empty()) + continue; + + std::replace(group.begin(), group.end(), ',', ' '); + auto layer = split(group); + + if (layer.size() != 3) + { + fprintf(stderr, "ERROR (%s) : invalid topology \'%s\'. Aborting.\n", ERRINFO, topology.c_str()); + exit(1); + } + + int input = layers.empty() ? nbInputs : layers.back().output_dim; + int output = std::stoi(layer[0]); + float dropout = std::stof(layer[2]); + layers.emplace_back(input, output, dropout, str2activation(layer[1])); + } + + layers.emplace_back(layers.back().output_dim, nbOutputs, 0.0, Activation::LINEAR); + + trainer.reset(new dynet::AmsgradTrainer(model, 0.001, 0.9, 0.999, 1e-8)); + + initDynet(); + + trainMode = true; + dropoutActive = true; + + checkLayersCompatibility(); + + for(Layer layer : layers) + addLayerToModel(layer); +} + MLP::MLP(std::vector<Layer> layers) -: layers(layers), trainer(model, 0.001, 0.9, 0.999, 1e-8) +: layers(layers) { + trainer.reset(new dynet::AmsgradTrainer(model, 0.001, 0.9, 0.999, 1e-8)); + initDynet(); trainMode = true; @@ -308,7 +352,7 @@ int MLP::trainOnBatch(Examples & examples, int start, int end) dynet::Expression batchedLoss = pickneglogsoftmax(output, goldClasses); dynet::Expression loss = sum_batches(batchedLoss); cg.backward(loss); - trainer.update(); + trainer->update(); } int nbCorrect = 0; @@ -450,8 +494,8 @@ void MLP::loadParameters(const std::string & filename) } MLP::MLP(const std::string & filename) -: trainer(model, 0.001, 0.9, 0.999, 1e-8) { + trainer.reset(new dynet::AmsgradTrainer(model, 0.001, 0.9, 0.999, 1e-8)); initDynet(); trainMode = false; diff --git a/tape_machine/include/Classifier.hpp b/tape_machine/include/Classifier.hpp index 6153cee14b3b63e8c480f9f4fdc30962ca821ac4..41a87bbc27912cd3870124e19228041e32b0d305 100644 --- a/tape_machine/include/Classifier.hpp +++ b/tape_machine/include/Classifier.hpp @@ -30,6 +30,7 @@ class Classifier std::unique_ptr<FeatureModel> fm; std::unique_ptr<ActionSet> as; std::unique_ptr<MLP> mlp; + std::string topology; Oracle * oracle; public : diff --git a/tape_machine/src/Classifier.cpp b/tape_machine/src/Classifier.cpp index d103e5274f5fa4f3980b1331312627d3f1e58809..aff4621121a02563ab3d3c785fbc3273040683e1 100644 --- a/tape_machine/src/Classifier.cpp +++ b/tape_machine/src/Classifier.cpp @@ -61,6 +61,11 @@ Classifier::Classifier(const std::string & filename, bool trainMode, const std:: badFormatAndAbort(ERRINFO); as.reset(new ActionSet(expPath + buffer, false)); + + if(fscanf(fd, "Topology : %s\n", buffer) != 1) + badFormatAndAbort(ERRINFO); + + topology = buffer; } Classifier::Type Classifier::str2type(const std::string & s) @@ -120,7 +125,6 @@ void Classifier::initClassifier(Config & config) } int nbInputs = 0; - int nbHidden = 300; int nbOutputs = as->actions.size(); auto fd = fm->getFeatureDescription(config); @@ -128,8 +132,7 @@ void Classifier::initClassifier(Config & config) for (auto feat : fd.values) nbInputs += feat.vec->size(); - mlp.reset(new MLP({{nbInputs, nbHidden, 0.3, MLP::Activation::RELU}, - {nbHidden, nbOutputs, 0.0, MLP::Activation::LINEAR}})); + mlp.reset(new MLP(nbInputs, topology, nbOutputs)); } FeatureModel::FeatureDescription Classifier::getFeatureDescription(Config & config)