diff --git a/CMakeLists.txt b/CMakeLists.txt index fbad2ea10a661d1d1841faea5a868055b25c417d..8390598a1492b17b1f548fc5f2b50037b3b6b14c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,12 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O3") include_directories(maca_common/include) include_directories(tape_machine/include) +include_directories(trainer/include) include_directories(tests/include) +include_directories(MLP/include) add_subdirectory(maca_common) add_subdirectory(tape_machine) +add_subdirectory(trainer) +add_subdirectory(MLP) add_subdirectory(tests) diff --git a/MLP/CMakeLists.txt b/MLP/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ead680112acaa9c8801edd65e4af1f42f0331b24 --- /dev/null +++ b/MLP/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB SOURCES src/*.cpp) + +#compiling library +add_library(MLP STATIC ${SOURCES}) +target_link_libraries(MLP tape_machine) +target_link_libraries(MLP dynet) diff --git a/MLP/include/MLP.hpp b/MLP/include/MLP.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f71c346d3cfb52fc5ebd85e866bcbbe7c20126a6 --- /dev/null +++ b/MLP/include/MLP.hpp @@ -0,0 +1,63 @@ +#ifndef MLP__H +#define MLP__H + +#include <dynet/nodes.h> +#include <dynet/dynet.h> +#include <dynet/training.h> +#include <dynet/timing.h> +#include <dynet/expr.h> +#include "FeatureModel.hpp" + +class MLP +{ + public : + + enum Activation + { + SIGMOID, + TANH, + RELU, + LINEAR, + SPARSEMAX, + CUBE, + SOFTMAX + }; + + static std::string activation2str(Activation a); + static Activation str2activation(std::string s); + + struct Layer + { + int input_dim; + int output_dim; + + float dropout_rate; + Activation activation; + + Layer(int input_dim, int output_dim, + float dropout_rate, Activation activation); + void print(FILE * file); + }; + + private : + + std::vector<Layer> layers; + std::vector< std::vector<dynet::Parameter> > parameters; + + dynet::ParameterCollection model; + dynet::AmsgradTrainer trainer; + bool trainMode; + + private : + + void addLayerToModel(Layer & layer); + void checkLayersCompatibility(); + dynet::DynetParams & getDefaultParams(); + + public : + + MLP(std::vector<Layer> layers); + std::vector<float> predict(FeatureModel::FeatureDescription & fd, int goldClass); +}; + +#endif diff --git a/MLP/src/MLP.cpp b/MLP/src/MLP.cpp new file mode 100644 index 0000000000000000000000000000000000000000..219b027cfcb01b76b026f6a722046d0bd5ce56c7 --- /dev/null +++ b/MLP/src/MLP.cpp @@ -0,0 +1,143 @@ +#include "MLP.hpp" +#include "util.hpp" + +std::string MLP::activation2str(Activation a) +{ + switch(a) + { + case LINEAR : + return "LINEAR"; + break; + case RELU : + return "RELU"; + break; + case CUBE : + return "CUBE"; + break; + case SIGMOID : + return "SIGMOID"; + break; + case TANH : + return "TANH"; + break; + case SOFTMAX : + return "SOFTMAX"; + break; + case SPARSEMAX : + return "SPARSEMAX"; + break; + default : + break; + } + + return "UNKNOWN"; +} + +MLP::Activation MLP::str2activation(std::string s) +{ + if(s == "LINEAR") + return LINEAR; + else if(s == "RELU") + return RELU; + else if(s == "CUBE") + return CUBE; + else if(s == "SIGMOID") + return SIGMOID; + else if(s == "TANH") + return TANH; + else if(s == "SOFTMAX") + return SOFTMAX; + else if(s == "SPARSEMAX") + return SPARSEMAX; + else + { + fprintf(stderr, "ERROR (%s) : invalid activation \'%s\'. Aborting\n",ERRINFO, s.c_str()); + exit(1); + } + + return LINEAR; +} + +MLP::MLP(std::vector<Layer> layers) +: layers(layers), trainer(model, 0.001, 0.9, 0.999, 1e-8) +{ + dynet::initialize(getDefaultParams()); + + trainMode = true; + + checkLayersCompatibility(); + + for(Layer layer : layers) + addLayerToModel(layer); +} + +void MLP::addLayerToModel(Layer & layer) +{ + dynet::Parameter W = model.add_parameters({(unsigned)layer.output_dim, (unsigned)layer.input_dim}); + dynet::Parameter b = model.add_parameters({(unsigned)layer.output_dim}); + parameters.push_back({W,b}); +} + +void MLP::checkLayersCompatibility() +{ + if(layers.empty()) + { + fprintf(stderr, "ERROR (%s) : constructed mlp with 0 layers. Aborting.\n", ERRINFO); + exit(1); + } + + for(unsigned int i = 0; i < layers.size()-1; i++) + if(layers[i].output_dim != layers[i+1].input_dim) + { + fprintf(stderr, "ERROR (%s) : constructed mlp with incompatible layers. Aborting.\n", ERRINFO); + exit(1); + } +} + +MLP::Layer::Layer(int input_dim, int output_dim, + float dropout_rate, Activation activation) +{ + this->input_dim = input_dim; + this->output_dim = output_dim; + this->dropout_rate = dropout_rate; + this->activation = activation; +} + +std::vector<float> MLP::predict(FeatureModel::FeatureDescription & fd, int goldClass) +{ + dynet::ComputationGraph cg; + + for (auto & featValue : fd.values) + { + dynet::Parameter p(*featValue.vec); + } + + /* + int nbInputs = layers[0].input_dim; + + dynet::Expression x = reshape(concatenate_cols(cur_batch), + dynet::Dim({nb_inputs}, cur_batch_size)); + + dynet::Expression loss_expr = get_loss(x_batch, cur_labels); + + loss += as_scalar(computation_graph.forward(loss_expr)); + nb_samples += cur_batch_size; + computation_graph.backward(loss_expr); + trainer.update(); + */ + + std::vector<float> res; + + return res; +} + +dynet::DynetParams & MLP::getDefaultParams() +{ + static dynet::DynetParams params; + params.random_seed = 100; + + std::srand(params.random_seed); + + return params; +} + diff --git a/tape_machine/CMakeLists.txt b/tape_machine/CMakeLists.txt index 38629a3520312e28d3620610f75afe25469cfdd4..8ce74442ab4433fc09b644d83a3e5517a903fc28 100644 --- a/tape_machine/CMakeLists.txt +++ b/tape_machine/CMakeLists.txt @@ -3,3 +3,4 @@ FILE(GLOB SOURCES src/*.cpp) #compiling library add_library(tape_machine STATIC ${SOURCES}) target_link_libraries(tape_machine maca_common) +target_link_libraries(tape_machine MLP) diff --git a/tape_machine/include/Action.hpp b/tape_machine/include/Action.hpp index a6fa5ecf261f64de7d88839f49d23765554a27f5..e6c24072b582dea71f127d5b545d8b37fa93f819 100644 --- a/tape_machine/include/Action.hpp +++ b/tape_machine/include/Action.hpp @@ -26,7 +26,7 @@ class Action std::string to_string(); }; - private : + public : std::vector<BasicAction> sequence; std::string name; diff --git a/tape_machine/include/ActionSet.hpp b/tape_machine/include/ActionSet.hpp index 98f8ec07782cec3914070e0b55f8d38955aaa771..8c6099ec5c3ae7d8b6e4ccc815cd8fe24d64d3c0 100644 --- a/tape_machine/include/ActionSet.hpp +++ b/tape_machine/include/ActionSet.hpp @@ -10,11 +10,13 @@ class ActionSet std::string name; std::vector<Action> actions; + std::map<std::string, int> str2index; public : ActionSet(const std::string & filename); void printForDebug(FILE * output); + int getActionIndex(const std::string & name); }; #endif diff --git a/tape_machine/include/Classifier.hpp b/tape_machine/include/Classifier.hpp index 7d477227c76ef41628ef03ea5b6863f03a0bccad..2772453fed6f52dd7513530d0a4a52c7e9fcbbec 100644 --- a/tape_machine/include/Classifier.hpp +++ b/tape_machine/include/Classifier.hpp @@ -5,11 +5,15 @@ #include <memory> #include "FeatureModel.hpp" #include "ActionSet.hpp" +#include "Oracle.hpp" +#include "MLP.hpp" class Classifier { public : + using WeightedActions = std::vector< std::pair<float, std::string> >; + enum Type { Prediction, @@ -21,11 +25,18 @@ class Classifier Type type; std::unique_ptr<FeatureModel> fm; std::unique_ptr<ActionSet> as; + std::unique_ptr<MLP> mlp; + Oracle * oracle; + + private : + + void initClassifier(Config & config); public : static Type str2type(const std::string & filename); Classifier(const std::string & filename); + WeightedActions weightActions(Config & config, const std::string & goldAction); }; #endif diff --git a/tape_machine/include/Config.hpp b/tape_machine/include/Config.hpp index d21b0bbb1e7c0162a30ed5c5097a30704e0af1e1..352f305646588e7b253a371827f64736d248d63e 100644 --- a/tape_machine/include/Config.hpp +++ b/tape_machine/include/Config.hpp @@ -12,6 +12,7 @@ class Config std::vector< std::vector< std::string> > tapes; std::vector<int> stack; int head; + std::string inputFilename; public : @@ -20,6 +21,9 @@ class Config std::vector<std::string> & getTapeByInputCol(int col); void readInput(const std::string & filename); void printForDebug(FILE * output); + void moveHead(int mvt); + bool isFinal(); + void reset(); }; #endif diff --git a/tape_machine/include/Oracle.hpp b/tape_machine/include/Oracle.hpp new file mode 100644 index 0000000000000000000000000000000000000000..76e6517018cf7a651b8779e2377212d0ff61b5d8 --- /dev/null +++ b/tape_machine/include/Oracle.hpp @@ -0,0 +1,26 @@ +#ifndef ORACLE__H +#define ORACLE__H + +#include <string> +#include <map> +#include <memory> +#include <functional> +#include "Config.hpp" + +class Oracle +{ + private : + + static std::map< std::string, std::unique_ptr<Oracle> > str2oracle; + static std::pair< std::string, std::unique_ptr<Oracle> > makePair(); + + Oracle(const std::function<std::string(Config &)> & getAction); + + public : + + static Oracle * getOracle(const std::string & name); + static void init(); + std::function<std::string(Config &)> getAction; +}; + +#endif diff --git a/tape_machine/include/TapeMachine.hpp b/tape_machine/include/TapeMachine.hpp index 425f410a8f6cb404441b229ceca47869cb52f65f..8e00b0718c91318fe7bc8f700e6bd6579093e82d 100644 --- a/tape_machine/include/TapeMachine.hpp +++ b/tape_machine/include/TapeMachine.hpp @@ -32,10 +32,14 @@ class TapeMachine std::string name; std::map< std::string, std::unique_ptr<Classifier> > str2classifier; std::map< std::string, std::unique_ptr<State> > str2state; + State * currentState; public : TapeMachine(const std::string & filename); + State * getCurrentState(); + Transition * getTransition(const std::string & action); + void takeTransition(Transition * transition); }; #endif diff --git a/tape_machine/src/ActionSet.cpp b/tape_machine/src/ActionSet.cpp index 40ccd68ed1260c7f082b5c81e1e8409ae9e1bc53..b2dd09836eb5292b3cc963c9ac0646f515c87ba8 100644 --- a/tape_machine/src/ActionSet.cpp +++ b/tape_machine/src/ActionSet.cpp @@ -10,7 +10,10 @@ ActionSet::ActionSet(const std::string & filename) char buffer[1024]; while(fscanf(fd, "%[^\n]\n", buffer) == 1) + { + str2index[buffer] = actions.size(); actions.emplace_back(buffer); + } this->name = getFilenameFromPath(filename); } @@ -23,3 +26,17 @@ void ActionSet::printForDebug(FILE * output) action.printForDebug(output); } +int ActionSet::getActionIndex(const std::string & name) +{ + auto it = str2index.find(name); + + if(it != str2index.end()) + return it->second; + + fprintf(stderr, "ERROR (%s) : unknown action \'%s\'. Aborting.\n", ERRINFO, name.c_str()); + + exit(1); + + return -1; +} + diff --git a/tape_machine/src/Classifier.cpp b/tape_machine/src/Classifier.cpp index 43af9e46e8807601eff6c4960949859737246486..18dd1334ebe68c1a965ea349a1e6010e0f140505 100644 --- a/tape_machine/src/Classifier.cpp +++ b/tape_machine/src/Classifier.cpp @@ -16,25 +16,30 @@ Classifier::Classifier(const std::string & filename) char buffer[1024]; - if(fscanf(fd, "%s\n", buffer) != 1) + if(fscanf(fd, "Name : %s\n", buffer) != 1) badFormatAndAbort(ERRINFO); name = buffer; - if(fscanf(fd, "%s\n", buffer) != 1) + if(fscanf(fd, "Type : %s\n", buffer) != 1) badFormatAndAbort(ERRINFO); type = str2type(buffer); - if(fscanf(fd, "%s\n", buffer) != 1) + if(fscanf(fd, "Feature Model : %s\n", buffer) != 1) badFormatAndAbort(ERRINFO); fm.reset(new FeatureModel(buffer)); - if(fscanf(fd, "%s\n", buffer) != 1) + if(fscanf(fd, "Action Set : %s\n", buffer) != 1) badFormatAndAbort(ERRINFO); as.reset(new ActionSet(buffer)); + + if(fscanf(fd, "Oracle : %s\n", buffer) != 1) + badFormatAndAbort(ERRINFO); + + oracle = Oracle::getOracle(buffer); } Classifier::Type Classifier::str2type(const std::string & s) @@ -52,3 +57,38 @@ Classifier::Type Classifier::str2type(const std::string & s) return Type::Prediction; } +Classifier::WeightedActions Classifier::weightActions(Config & config, const std::string & goldAction) +{ + if(!mlp.get()) + initClassifier(config); + + int actionIndex = as->getActionIndex(goldAction); + + auto fd = fm->getFeatureDescription(config); + auto scores = mlp->predict(fd, actionIndex); + + WeightedActions result; + + for (unsigned int i = 0; i < scores.size(); i++) + result.emplace_back(scores[i], as->actions[i].name); + + std::sort(result.begin(), result.end()); + + return result; +} + +void Classifier::initClassifier(Config & config) +{ + int nbInputs = 0; + int nbHidden = 200; + int nbOutputs = as->actions.size(); + + auto fd = fm->getFeatureDescription(config); + + for (auto feat : fd.values) + nbInputs += feat.vec->size(); + + mlp.reset(new MLP({{nbInputs, nbHidden, 0.0, MLP::Activation::RELU}, + {nbHidden, nbOutputs, 0.0, MLP::Activation::LINEAR}})); +} + diff --git a/tape_machine/src/Config.cpp b/tape_machine/src/Config.cpp index 94c77c125adb92bbaa3c2dd73b82a53f587f44da..7f81823b2ade7ae4142b5b84f9c855217602e1c8 100644 --- a/tape_machine/src/Config.cpp +++ b/tape_machine/src/Config.cpp @@ -18,6 +18,7 @@ std::vector<std::string> & Config::getTapeByInputCol(int col) void Config::readInput(const std::string & filename) { + this->inputFilename = filename; File file(filename, "r"); int nbInputCol = mcd.getNbInputColumns(); @@ -39,11 +40,35 @@ void Config::readInput(const std::string & filename) void Config::printForDebug(FILE * output) { + int window = 3; + for(auto & tape : tapes) { - for(auto & s : tape) - fprintf(output, "%s\t", s.c_str()); + for(int i = std::max(0, head-window); i < std::min((int)tape.size(), head+window); i++) + fprintf(output, "%s%s%s\t", i==head ? "|" : "", tape[i].c_str(), i==head ? "|" : ""); fprintf(output, "\n"); } } +void Config::moveHead(int mvt) +{ + head += mvt; +} + +bool Config::isFinal() +{ + return head >= (int)getTapeByInputCol(0).size() && stack.empty(); +} + +void Config::reset() +{ + for (auto & tape : tapes) + tape.clear(); + + stack.clear(); + + head = 0; + + readInput(inputFilename); +} + diff --git a/tape_machine/src/Oracle.cpp b/tape_machine/src/Oracle.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f38a3fa2ee6af31418b5d2dc25834c9fb344c162 --- /dev/null +++ b/tape_machine/src/Oracle.cpp @@ -0,0 +1,37 @@ +#include "Oracle.hpp" +#include "util.hpp" + +std::map< std::string, std::unique_ptr<Oracle> > Oracle::str2oracle; + +Oracle::Oracle(const std::function<std::string(Config &)> & getAction) +{ + this->getAction = getAction; +} + +Oracle * Oracle::getOracle(const std::string & name) +{ + init(); + + auto it = str2oracle.find(name); + + if(it != str2oracle.end()) + return it->second.get(); + + fprintf(stderr, "ERROR (%s) : invalid oracle name \'%s\'. Aborting.\n", ERRINFO, name.c_str()); + + return nullptr; +} + +void Oracle::init() +{ + static bool isInit = false; + if(isInit) + return; + isInit = true; + + str2oracle.emplace("tagger", std::unique_ptr<Oracle>(new Oracle([](Config & c) + { + return "TAG 0 POS " + c.getTape("POS")[c.head]; + }))); +} + diff --git a/tape_machine/src/TapeMachine.cpp b/tape_machine/src/TapeMachine.cpp index 7e6131cf260777f2dd7097d3c2e0a139701c3169..da26748e5dfea0072a8613e0ec7514d37450f604 100644 --- a/tape_machine/src/TapeMachine.cpp +++ b/tape_machine/src/TapeMachine.cpp @@ -1,6 +1,7 @@ #include "TapeMachine.hpp" #include "File.hpp" #include "util.hpp" +#include <cstring> TapeMachine::TapeMachine(const std::string & filename) { @@ -19,7 +20,7 @@ TapeMachine::TapeMachine(const std::string & filename) char buffer3[1024]; // Reading the name - if(fscanf(fd, "%[^\n]\n", buffer) != 1) + if(fscanf(fd, "Name : %[^\n]\n", buffer) != 1) badFormatAndAbort(ERRINFO); name = buffer; @@ -41,6 +42,8 @@ TapeMachine::TapeMachine(const std::string & filename) if(buffer != std::string("STATES")) badFormatAndAbort(ERRINFO); + currentState = nullptr; + while(fscanf(fd, "%%%s\n", buffer) != 1) { // Reading a state @@ -53,6 +56,9 @@ TapeMachine::TapeMachine(const std::string & filename) Classifier * classifier = str2classifier[buffer2].get(); str2state.emplace(buffer, std::unique_ptr<State>(new State(buffer, classifier))); + + if(!currentState) // Initial state = first state in the file + currentState = str2state[buffer].get(); } // Reading %TRANSITIONS @@ -93,3 +99,28 @@ TapeMachine::Transition::Transition(State * dest, const std::string & prefix, in this->headMvt = mvt; } +TapeMachine::State * TapeMachine::getCurrentState() +{ + return currentState; +} + +TapeMachine::Transition * TapeMachine::getTransition(const std::string & action) +{ + for (auto & transition : currentState->transitions) + { + if(!strncmp(action.c_str(), transition.actionPrefix.c_str(), transition.actionPrefix.size())) + return &transition; + } + + fprintf(stderr, "ERROR (%s) : no corresponding transition for action \'%s\' and state \'%s\'. Aborting.\n", ERRINFO, action.c_str(), currentState->name.c_str()); + + exit(1); + + return nullptr; +} + +void TapeMachine::takeTransition(Transition * transition) +{ + currentState = transition->dest; +} + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5389158865fd44930dd33f8a95d547c6cd9de8f8..3da1b40b4d359fd6f01a9ed82b936a65f33de717 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,3 +2,4 @@ FILE(GLOB SOURCES src/*.cpp) add_executable(test_train src/test_train.cpp) target_link_libraries(test_train tape_machine) +target_link_libraries(test_train trainer) diff --git a/tests/src/test_train.cpp b/tests/src/test_train.cpp index 8b09c3377f365e6144a19c212f121b6d75bbc266..3d48cd144ae9398f569b1c7109d91fc2e87ff714 100644 --- a/tests/src/test_train.cpp +++ b/tests/src/test_train.cpp @@ -3,6 +3,7 @@ #include "MCD.hpp" #include "Config.hpp" #include "TapeMachine.hpp" +#include "Trainer.hpp" void printUsageAndExit(char * argv[]) { @@ -22,7 +23,9 @@ int main(int argc, char * argv[]) config.readInput(argv[2]); - config.printForDebug(stderr); + Trainer trainer(tapeMachine, mcd, config); + + trainer.train(); return 0; } diff --git a/trainer/CMakeLists.txt b/trainer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e9cc312c6821b074637c11d5ba9bf1790e20becf --- /dev/null +++ b/trainer/CMakeLists.txt @@ -0,0 +1,7 @@ +FILE(GLOB SOURCES src/*.cpp) + +#add_executable(test_train src/test_train.cpp) +#target_link_libraries(test_train tape_machine) + +#compiling library +add_library(trainer STATIC ${SOURCES}) diff --git a/trainer/include/Trainer.hpp b/trainer/include/Trainer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f2246148da08fdf128d40652c00d790c7370808b --- /dev/null +++ b/trainer/include/Trainer.hpp @@ -0,0 +1,26 @@ +#ifndef TRAINER__H +#define TRAINER__H + +#include "TapeMachine.hpp" +#include "MCD.hpp" +#include "Config.hpp" + +class Trainer +{ + private : + + TapeMachine & tm; + MCD & mcd; + Config & config; + + private : + + void printWeightedActions(FILE * output, Classifier::WeightedActions & wa); + + public : + + Trainer(TapeMachine & tm, MCD & mcd, Config & config); + void train(); +}; + +#endif diff --git a/trainer/src/Trainer.cpp b/trainer/src/Trainer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a85780d85f1702191d7e2aa065e91c2b99c731bd --- /dev/null +++ b/trainer/src/Trainer.cpp @@ -0,0 +1,41 @@ +#include "Trainer.hpp" + +Trainer::Trainer(TapeMachine & tm, MCD & mcd, Config & config) +: tm(tm), mcd(mcd), config(config) +{ +} + +void Trainer::train() +{ + for (int i = 0; i < 2; i++) + { + while (!config.isFinal()) + { + TapeMachine::State * currentState = tm.getCurrentState(); + Classifier * classifier = currentState->classifier; + + //config.printForDebug(stderr); + + //fprintf(stderr, "State : \'%s\'\n", currentState->name.c_str()); + + std::string neededActionName = classifier->oracle->getAction(config); + auto weightedActions = classifier->weightActions(config, neededActionName); + printWeightedActions(stderr, weightedActions); + + //fprintf(stderr, "Action : \'%s\'\n", neededActionName.c_str()); + + TapeMachine::Transition * transition = tm.getTransition(neededActionName); + tm.takeTransition(transition); + config.moveHead(transition->headMvt); + } + + config.reset(); + } +} + +void Trainer::printWeightedActions(FILE * output, Classifier::WeightedActions & wa) +{ + for (auto it : wa) + fprintf(output, "%.2f\t%s\n", it.first, it.second.c_str()); +} +