Commit e253ffad authored by Franck Dary's avatar Franck Dary
Browse files

Added regression

parent 2a0a58f1
......@@ -16,6 +16,9 @@
namespace util
{
constexpr float float2longScale = 10000;
void warning(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
void error(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
void error(const std::exception & e, const std::experimental::source_location & location = std::experimental::source_location::current());
......@@ -31,7 +34,7 @@ utf8string splitAsUtf8(std::string_view s);
std::string int2HumanStr(int number);
std::string shrink(const std::string & s, int printedSize);
std::string shrink(std::string s, int printedSize);
std::string strip(const std::string & s);
......@@ -48,6 +51,9 @@ bool isNumber(const std::string & s);
std::string getTime();
long float2long(float f);
float long2float(long l);
template <typename T>
bool isEmpty(const std::vector<T> & s)
{
......
......@@ -4,6 +4,16 @@
#include <algorithm>
#include "upper2lower"
float util::long2float(long l)
{
return l / util::float2longScale;
}
long util::float2long(float f)
{
return f * util::float2longScale;
}
int util::printedLength(std::string_view s)
{
return splitAsUtf8(s).size();
......@@ -91,10 +101,20 @@ util::utf8string util::splitAsUtf8(std::string_view s)
return result;
}
std::string util::shrink(const std::string & s, int printedSize)
std::string util::shrink(std::string s, int printedSize)
{
static const std::string filler = "…";
if (printedLength(s) <= printedSize)
return s;
try
{
float value = std::stof(s);
s = fmt::format("{:{}.3f}", value, printedSize);
}
catch (std::exception &) {}
if (printedLength(s) <= printedSize)
return s;
......
......@@ -50,7 +50,7 @@ void Beam::update(ReadingMachine & machine, bool debug)
auto context = classifier.getNN()->extractContext(elements[index].config).back();
auto neuralInput = torch::from_blob(context.data(), {(long)context.size()}, torch::kLong).clone().to(NeuralNetworkImpl::device);
auto prediction = torch::softmax(classifier.getNN()(neuralInput).squeeze(0), 0);
auto prediction = classifier.isRegression() ? classifier.getNN()(neuralInput).squeeze(0) : torch::softmax(classifier.getNN()(neuralInput).squeeze(0), 0);
std::vector<std::pair<float, int>> scoresOfTransitions;
for (unsigned int i = 0; i < prediction.size(0); i++)
......@@ -76,13 +76,15 @@ void Beam::update(ReadingMachine & machine, bool debug)
{
elements.emplace_back(elements[index], scoresOfTransitions[i].second);
elements.back().name.push_back(std::to_string(i));
elements.back().totalProbability += scoresOfTransitions[i].first;
elements.back().totalProbability += classifier.isRegression() ? 1.0 : scoresOfTransitions[i].first;
elements.back().config.setChosenActionScore(scoresOfTransitions[i].first);
elements.back().nbTransitions++;
elements.back().meanProbability = elements.back().totalProbability / elements.back().nbTransitions;
}
elements[index].nextTransition = scoresOfTransitions[0].second;
elements[index].totalProbability += scoresOfTransitions[0].first;
elements[index].totalProbability += classifier.isRegression() ? 1.0 : scoresOfTransitions[0].first;
elements[index].config.setChosenActionScore(scoresOfTransitions[0].first);
elements[index].nbTransitions++;
elements[index].name.push_back("0");
elements[index].meanProbability = 0.0;
......
......@@ -47,6 +47,7 @@ class Action
static Action addHypothesisRelative(const std::string & colName, Config::Object object, int relativeIndex, const std::string & hypothesis);
static Action addHypothesisRelativeRelaxed(const std::string & colName, Config::Object object, int relativeIndex, const std::string & hypothesis);
static Action addToHypothesisRelative(const std::string & colName, Config::Object object, int relativeIndex, const std::string & addition);
static Action writeScore(const std::string & colName, Config::Object object, int relativeIndex);
static Action pushWordIndexOnStack();
static Action popStack(int relIndex);
static Action emptyStack();
......
......@@ -5,6 +5,7 @@
#include <filesystem>
#include "TransitionSet.hpp"
#include "NeuralNetwork.hpp"
#include "LossFunction.hpp"
class Classifier
{
......@@ -24,6 +25,8 @@ class Classifier
std::string state;
std::vector<std::string> states;
std::filesystem::path path;
bool regression{false};
LossFunction lossFct;
private :
......@@ -49,6 +52,8 @@ class Classifier
void saveDicts();
void saveBest();
void saveLast();
bool isRegression() const;
LossFunction & getLossFunction();
};
#endif
......@@ -54,6 +54,7 @@ class Config
String state{"NONE"};
boost::circular_buffer<String> history{10};
boost::circular_buffer<std::size_t> stack{50};
float chosenActionScore{0.0};
std::vector<std::string> extraColumns{isMultiColName, childsColName, sentIdColName, EOSColName};
std::set<std::string> predicted;
int lastPoppedStack{-1};
......@@ -167,6 +168,8 @@ class Config
Strategy & getStrategy();
std::size_t getCurrentSentenceStartRawInput() const;
void setCurrentSentenceStartRawInput(std::size_t value);
void setChosenActionScore(float chosenActionScore);
float getChosenActionScore() const;
};
#endif
......@@ -56,6 +56,7 @@ class Transition
void initNothing(std::string col, std::string obj, std::string index);
void initLowercase(std::string col, std::string obj, std::string index);
void initLowercaseIndex(std::string col, std::string obj, std::string index, std::string inIndex);
void initWriteScore(std::string colName, std::string object, std::string index);
public :
......
......@@ -1053,3 +1053,38 @@ Action Action::lowercaseIndex(std::string col, Config::Object obj, int index, in
return {Type::Write, apply, undo, appliable};
}
Action Action::writeScore(const std::string & colName, Config::Object object, int relativeIndex)
{
auto apply = [colName, object, relativeIndex](Config & config, Action & a)
{
int lineIndex = config.getRelativeWordIndex(object, relativeIndex);
float score = config.getChosenActionScore();
if (score != std::numeric_limits<float>::min())
return addHypothesis(colName, lineIndex, fmt::format("{}", score)).apply(config, a);
else
return addHypothesis(colName, lineIndex, config.getConst(colName, lineIndex, 0)).apply(config, a);
};
auto undo = [colName, object, relativeIndex](Config & config, Action & a)
{
int lineIndex = config.getRelativeWordIndex(object, relativeIndex);
return addHypothesis(colName, lineIndex, "").undo(config, a);
};
auto appliable = [colName, object, relativeIndex](const Config & config, const Action & a)
{
if (!config.hasRelativeWordIndex(object, relativeIndex))
return false;
int lineIndex = config.getRelativeWordIndex(object, relativeIndex);
return addHypothesis(colName, lineIndex, "").appliable(config, a);
};
return {Type::Write, apply, undo, appliable};
}
......@@ -143,6 +143,28 @@ void Classifier::initNeuralNetwork(const std::vector<std::string> & definition)
optimizerParameters = sm.str(2);
}))
util::myThrow(fmt::format("Invalid line '{}', expected '{}'\n", curIndex < definition.size() ? definition[curIndex] : "", "(Optimizer :) " + util::join("|", knownOptimizers)));
curIndex++;
if (curIndex >= definition.size() || !util::doIfNameMatch(std::regex("(?:(?:\\s|\\t)*)(?:Type :|)(?:(?:\\s|\\t)*)(.+)"), definition[curIndex], [&curIndex,this](auto sm)
{
auto type = sm.str(1);
if (util::lower(type) == "regression")
regression = true;
else if (util::lower(type) == "classification")
regression = false;
else
util::myThrow(fmt::format("Invalid type '{}' expected 'classification' or 'regression'", type));
}))
util::myThrow(fmt::format("Invalid line '{}', expected '{}'\n", curIndex < definition.size() ? definition[curIndex] : "", "(Type :) (classification | regression)" ));
curIndex++;
if (curIndex >= definition.size() || !util::doIfNameMatch(std::regex("(?:(?:\\s|\\t)*)(?:Loss :|)(?:(?:\\s|\\t)*)(.+)"), definition[curIndex], [&curIndex,this](auto sm)
{
lossFct.init(sm.str(1));
}))
util::myThrow(fmt::format("Invalid line '{}', expected '{}'\n", curIndex < definition.size() ? definition[curIndex] : "", "(Loss :) lossName" ));
}
void Classifier::loadOptimizer()
......@@ -250,3 +272,13 @@ void Classifier::saveLast()
saveOptimizer();
}
bool Classifier::isRegression() const
{
return regression;
}
LossFunction & Classifier::getLossFunction()
{
return lossFct;
}
......@@ -795,3 +795,13 @@ void Config::setCurrentSentenceStartRawInput(std::size_t value)
currentSentenceStartRawInput = value;
}
void Config::setChosenActionScore(float chosenActionScore)
{
this->chosenActionScore = chosenActionScore;
}
float Config::getChosenActionScore() const
{
return chosenActionScore;
}
......@@ -7,6 +7,8 @@ Transition::Transition(const std::string & name)
{
{std::regex("WRITE ([bs])\\.(.+) (.+) (.+)"),
[this](auto sm){(initWrite(sm[3], sm[1], sm[2], sm[4]));}},
{std::regex("WRITESCORE ([bs])\\.(.+) (.+)"),
[this](auto sm){(initWriteScore(sm[3], sm[1], sm[2]));}},
{std::regex("ADD ([bs])\\.(.+) (.+) (.+)"),
[this](auto sm){(initAdd(sm[3], sm[1], sm[2], sm[4]));}},
{std::regex("eager_SHIFT"),
......@@ -164,6 +166,21 @@ void Transition::initWrite(std::string colName, std::string object, std::string
costStatic = costDynamic;
}
void Transition::initWriteScore(std::string colName, std::string object, std::string index)
{
auto objectValue = Config::str2object(object);
int indexValue = std::stoi(index);
sequence.emplace_back(Action::writeScore(colName, objectValue, indexValue));
costDynamic = [](const Config &)
{
return 0;
};
costStatic = costDynamic;
}
void Transition::initAdd(std::string colName, std::string object, std::string index, std::string value)
{
auto objectValue = Config::str2object(object);
......
#ifndef LOSSFUNCTION__H
#define LOSSFUNCTION__H
#include <variant>
#include "torch/torch.h"
#include "CustomHingeLoss.hpp"
class LossFunction
{
private :
std::string name{"_undefined_loss_"};
std::variant<torch::nn::CrossEntropyLoss, torch::nn::BCELoss, torch::nn::MSELoss, CustomHingeLoss> fct;
public :
void init(std::string name);
torch::Tensor operator()(torch::Tensor prediction, torch::Tensor gold);
torch::Tensor getGoldFromClassesIndexes(int nbClasses, const std::vector<long> & goldIndexes) const;
};
#endif
#include "LossFunction.hpp"
#include "util.hpp"
void LossFunction::init(std::string name)
{
this->name = name;
if (util::lower(name) == "crossentropy")
fct = torch::nn::CrossEntropyLoss(torch::nn::CrossEntropyLossOptions().reduction(torch::kMean));
else if (util::lower(name) == "bce")
fct = torch::nn::BCELoss(torch::nn::BCELossOptions().reduction(torch::kMean));
else if (util::lower(name) == "mse")
fct = torch::nn::MSELoss(torch::nn::MSELossOptions().reduction(torch::kMean));
else if (util::lower(name) == "hinge")
fct = CustomHingeLoss();
else
util::myThrow(fmt::format("unknown loss function name '{}' available losses are 'crossentropy, bce, mse, hinge'", name));
}
torch::Tensor LossFunction::operator()(torch::Tensor prediction, torch::Tensor gold)
{
try
{
auto index = fct.index();
if (index == 0)
return std::get<0>(fct)(prediction, gold.reshape(gold.dim() == 0 ? 1 : gold.size(0)));
if (index == 1)
return std::get<1>(fct)(torch::softmax(prediction, 1), gold.to(torch::kFloat));
if (index == 2)
return std::get<2>(fct)(prediction, gold);
if (index == 3)
return std::get<3>(fct)(torch::softmax(prediction, 1), gold);
} catch (std::exception & e)
{
util::myThrow(fmt::format("computing loss '{}' caught '{}'", name, e.what()));
}
util::myThrow("loss is not defined");
return torch::Tensor();
}
torch::Tensor LossFunction::getGoldFromClassesIndexes(int nbClasses, const std::vector<long> & goldIndexes) const
{
auto index = fct.index();
if (index == 0 or index == 2)
{
auto gold = torch::zeros(1, torch::TensorOptions(torch::kLong));
gold[0] = goldIndexes.at(0);
return gold;
}
if (index == 1 or index == 3)
{
auto gold = torch::zeros(nbClasses, torch::TensorOptions(torch::kLong));
for (auto goldIndex : goldIndexes)
gold[goldIndex] = 1;
return gold;
}
util::myThrow("loss is not defined");
return torch::Tensor();
}
......@@ -4,20 +4,6 @@
#include "ReadingMachine.hpp"
#include "ConfigDataset.hpp"
#include "SubConfig.hpp"
#include "CustomHingeLoss.hpp"
class LossFunction
{
private :
std::variant<torch::nn::CrossEntropyLoss, torch::nn::BCELoss, torch::nn::MSELoss, CustomHingeLoss> fct;
public :
LossFunction(std::string name);
torch::Tensor operator()(torch::Tensor prediction, torch::Tensor gold);
torch::Tensor getGoldFromClassesIndexes(int nbClasses, const std::vector<int> & goldIndexes) const;
};
class Trainer
{
......@@ -49,7 +35,7 @@ class Trainer
void saveIfNeeded(const std::string & state, std::filesystem::path dir, std::size_t threshold, int currentEpoch, bool dynamicOracle);
void addContext(std::vector<std::vector<long>> & context);
void addClass(const LossFunction & lossFct, int nbClasses, const std::vector<int> & goldIndexes);
void addClass(const LossFunction & lossFct, int nbClasses, const std::vector<long> & goldIndexes);
};
private :
......@@ -66,7 +52,6 @@ class Trainer
DataLoader devDataLoader{nullptr};
std::size_t epochNumber{0};
int batchSize;
LossFunction lossFct;
private :
......@@ -75,7 +60,7 @@ class Trainer
public :
Trainer(ReadingMachine & machine, int batchSize, std::string lossFunctionName);
Trainer(ReadingMachine & machine, int batchSize);
void createDataset(BaseConfig & goldConfig, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold);
void makeDataLoader(std::filesystem::path dir);
void makeDevDataLoader(std::filesystem::path dir);
......
......@@ -40,8 +40,6 @@ po::options_description MacaonTrain::getOptionsDescription()
"Reading machine file content")
("trainStrategy", po::value<std::string>()->default_value("0,ExtractGold,ResetParameters"),
"Description of what should happen during training")
("loss", po::value<std::string>()->default_value("CrossEntropy"),
"Loss function to use during training : CrossEntropy | bce | mse | hinge")
("seed", po::value<int>()->default_value(100),
"Number of examples per batch")
("scaleGrad", "Scale embedding's gradient with its frequence in the minibatch")
......@@ -135,7 +133,6 @@ int MacaonTrain::main()
bool computeDevScore = variables.count("devScore") == 0 ? false : true;
auto machineContent = variables["machine"].as<std::string>();
auto trainStrategyStr = variables["trainStrategy"].as<std::string>();
auto lossFunction = variables["loss"].as<std::string>();
auto explorationThreshold = variables["explorationThreshold"].as<float>();
auto seed = variables["seed"].as<int>();
WordEmbeddingsImpl::setMaxNorm(variables["maxNorm"].as<float>());
......@@ -167,7 +164,7 @@ int MacaonTrain::main()
BaseConfig goldConfig(mcd, trainTsvFile, trainRawFile);
BaseConfig devGoldConfig(mcd, computeDevScore ? (devRawFile.empty() ? devTsvFile : "") : devTsvFile, devRawFile);
Trainer trainer(machine, batchSize, lossFunction);
Trainer trainer(machine, batchSize);
Decoder decoder(machine);
float bestDevScore = computeDevScore ? std::numeric_limits<float>::min() : std::numeric_limits<float>::max();
......
#include "Trainer.hpp"
#include "SubConfig.hpp"
LossFunction::LossFunction(std::string name)
{
if (util::lower(name) == "crossentropy")
fct = torch::nn::CrossEntropyLoss(torch::nn::CrossEntropyLossOptions().reduction(torch::kMean));
else if (util::lower(name) == "bce")
fct = torch::nn::BCELoss(torch::nn::BCELossOptions().reduction(torch::kMean));
else if (util::lower(name) == "mse")
fct = torch::nn::MSELoss(torch::nn::MSELossOptions().reduction(torch::kMean));
else if (util::lower(name) == "hinge")
fct = CustomHingeLoss();
else
util::myThrow(fmt::format("unknown loss function name '{}'", name));
}
torch::Tensor LossFunction::operator()(torch::Tensor prediction, torch::Tensor gold)
{
auto index = fct.index();
if (index == 0)
return std::get<0>(fct)(prediction, gold.reshape(gold.dim() == 0 ? 1 : gold.size(0)));
if (index == 1)
return std::get<1>(fct)(torch::softmax(prediction, 1), gold.to(torch::kFloat));
if (index == 2)
return std::get<2>(fct)(torch::softmax(prediction, 1), gold.to(torch::kFloat));
if (index == 3)
return std::get<3>(fct)(torch::softmax(prediction, 1), gold);
util::myThrow("loss is not defined");
return torch::Tensor();
}
torch::Tensor LossFunction::getGoldFromClassesIndexes(int nbClasses, const std::vector<int> & goldIndexes) const
{
auto index = fct.index();
if (index == 0)
{
auto gold = torch::zeros(1, torch::TensorOptions(torch::kLong));
gold[0] = goldIndexes.at(0);
return gold;
}
if (index == 1 or index == 2 or index == 3)
{
auto gold = torch::zeros(nbClasses, torch::TensorOptions(torch::kLong));
for (auto goldIndex : goldIndexes)
gold[goldIndex] = 1;
return gold;
}
util::myThrow("loss is not defined");
return torch::Tensor();
}
Trainer::Trainer(ReadingMachine & machine, int batchSize, std::string lossFunctionName) : machine(machine), batchSize(batchSize), lossFct(lossFunctionName)
Trainer::Trainer(ReadingMachine & machine, int batchSize) : machine(machine), batchSize(batchSize)
{
}
......@@ -134,13 +81,14 @@ void Trainer::extractExamples(SubConfig & config, bool debug, std::filesystem::p
goldTransitions[std::rand()%goldTransitions.size()];
int nbClasses = machine.getTransitionSet(config.getState()).size();
float bestScore = std::numeric_limits<float>::min();
if (dynamicOracle and util::choiceWithProbability(1.0) and config.getState() != "tokenizer" and config.getState() != "segmenter")
{
auto neuralInput = torch::from_blob(context[0].data(), {(long)context[0].size()}, torch::kLong).clone().to(NeuralNetworkImpl::device);
auto prediction = torch::softmax(machine.getClassifier(config.getState())->getNN()(neuralInput), -1).squeeze(0);
float bestScore = std::numeric_limits<float>::min();
std::vector<int> candidates;
for (unsigned int i = 0; i < prediction.size(0); i++)
......@@ -170,18 +118,42 @@ void Trainer::extractExamples(SubConfig & config, bool debug, std::filesystem::p
util::myThrow("No transition appliable !");
}
std::vector<long> goldIndexes;
float regressionTarget = 0.0;
if (machine.getClassifier(config.getState())->isRegression())
{
auto errMessage = fmt::format("Invalid regression transition '{}'", transition->getName());
auto splited = util::split(transition->getName(), ' ');
if (splited.size() != 3 or splited[0] != "WRITESCORE")
util::myThrow(errMessage);
auto col = splited[2];
splited = util::split(splited[1], '.');
if (splited.size() != 2)
util::myThrow(errMessage);
auto object = Config::str2object(splited[0]);
int index = std::stoi(splited[1]);
regressionTarget = std::stof(config.getConst(col, config.getRelativeWordIndex(object, index), 0));
goldIndexes.emplace_back(util::float2long(regressionTarget));
}
else
{
for (auto & t : goldTransitions)
goldIndexes.emplace_back(machine.getTransitionSet(config.getState()).getTransitionIndex(t));
}
totalNbExamples += context.size();
if (totalNbExamples >= (int)safetyNbExamplesMax)
util::myThrow(fmt::format("Trying to extract more examples than the limit ({})", util::int2HumanStr(safetyNbExamplesMax)));
std::vector<int> goldIndexes;
for (auto & t : goldTransitions)
goldIndexes.emplace_back(machine.getTransitionSet(config.getState()).getTransitionIndex(t));
examplesPerState[config.getState()].addContext(context);
examplesPerState[config.getState()].addClass(lossFct, nbClasses, goldIndexes);
examplesPerState[config.getState()].addClass(machine.getClassifier(config.getState())->getLossFunction(), nbClasses, goldIndexes);
examplesPerState[config.getState()].saveIfNeeded(config.getState(), dir, maxNbExamplesPerFile, epoch, dynamicOracle);
config.setChosenActionScore(bestScore);
transition->apply(config);
config.addToHistory(transition->getName());
......@@ -238,7 +210,13 @@ float Trainer::processDataset(DataLoader & loader, bool train, bool printAdvance
if (prediction.dim() == 1)
prediction = prediction.unsqueeze(0);
auto loss = machine.getClassifier(state)->getLossMultiplier()*lossFct(prediction, labels);
if (machine.getClassifier(state)->isRegression())
{
labels = labels.to(torch::kFloat);
labels /= util::float2longScale;
}
auto loss = machine.getClassifier(state)->getLossMultiplier()*machine.getClassifier(state)->getLossFunction()(prediction, labels);
float lossAsFloat = 0.0;
try
{
......@@ -316,7 +294,7 @@ void Trainer::Examples::addContext(std::vector<std::vector<long>> & context)
currentExampleIndex += context.size();
}
void Trainer::Examples::addClass(const