Skip to content
Snippets Groups Projects
Commit 006cdfc9 authored by Franck Dary's avatar Franck Dary
Browse files

trying to fix backtrack prediction

parent b44bc247
Branches
No related tags found
No related merge requests found
......@@ -162,7 +162,7 @@ float MLPBase::update(FeatureModel::FeatureDescription & fd, const std::vector<f
dynet::Expression batchedLoss;
std::vector<dynet::Expression> goldExpressions;
for (auto & gold : goldsContinuous)
goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({1,(unsigned int)gold.size()}), gold));
goldExpressions.emplace_back(dynet::input(cg, dynet::Dim({(unsigned int)gold.size()}), gold));
dynet::Expression batchedGold = dynet::concatenate_to_batch(goldExpressions);
batchedLoss = dynet::sum_batches(dynet::squared_distance(output, batchedGold));
......
......@@ -400,20 +400,36 @@ void Trainer::doStepTrain()
if (TI.lastActionWasPredicted[normalStateName])
{
std::string updateInfos;
if (newCost >= lastCost)
{
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], tm.getCurrentClassifier()->getActionIndex("EPSILON"));
// loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], tm.getCurrentClassifier()->getActionIndex("EPSILON"));
int nbActions = tm.getCurrentClassifier()->getNbActions();
int backIndex = tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top());
float value = 1.0 / (nbActions-1);
std::vector<float> goldOutput(nbActions, value);
goldOutput[backIndex] = 0.0;
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], goldOutput);
updateInfos = "predicted : <"+trainConfig.getCurrentStateHistory().top()+">, bad decision";
}
else
{
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top()));
//loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top()));
int nbActions = tm.getCurrentClassifier()->getNbActions();
int backIndex = tm.getCurrentClassifier()->getActionIndex(trainConfig.getCurrentStateHistory().top());
std::vector<float> goldOutput(nbActions, 0.0);
goldOutput[backIndex] = 1.0;
if (ProgramParameters::debug)
fprintf(stderr, "Updating neural network \'%s\', gold=\'%s\'\n", tm.getCurrentClassifier()->name.c_str(), trainConfig.getCurrentStateHistory().top().c_str());
loss = tm.getCurrentClassifier()->trainOnExample(pendingFD[tm.getCurrentClassifier()->name], goldOutput);
updateInfos = "predicted : <"+trainConfig.getCurrentStateHistory().top()+">, good decision";
}
if (ProgramParameters::debug)
fprintf(stderr, "Updating neural network \'%s\'\n", tm.getCurrentClassifier()->name.c_str());
fprintf(stderr, "Updating neural network \'%s\' : %s\n", tm.getCurrentClassifier()->name.c_str(), updateInfos.c_str());
TI.addTrainLoss(tm.getCurrentClassifier()->name, loss);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment