Skip to content
Snippets Groups Projects
Commit 20aef337 authored by Franck Dary's avatar Franck Dary
Browse files

Added infos to error analysis, and moved it to macaon_decode

parent 18daad6f
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,7 @@ add_executable(macaon_decode src/macaon_decode.cpp) ...@@ -4,6 +4,7 @@ add_executable(macaon_decode src/macaon_decode.cpp)
target_link_libraries(macaon_decode transition_machine) target_link_libraries(macaon_decode transition_machine)
target_link_libraries(macaon_decode decoder) target_link_libraries(macaon_decode decoder)
target_link_libraries(macaon_decode ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_link_libraries(macaon_decode ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries(macaon_decode errors)
install(TARGETS macaon_decode DESTINATION bin) install(TARGETS macaon_decode DESTINATION bin)
#compiling library #compiling library
......
#include "Decoder.hpp" #include "Decoder.hpp"
#include "util.hpp" #include "util.hpp"
#include "Error.hpp"
Decoder::Decoder(TransitionMachine & tm, Config & config) Decoder::Decoder(TransitionMachine & tm, Config & config)
: tm(tm), config(config) : tm(tm), config(config)
...@@ -11,6 +12,8 @@ void Decoder::decode() ...@@ -11,6 +12,8 @@ void Decoder::decode()
float entropyAccumulator = 0.0; float entropyAccumulator = 0.0;
int nbActionsInSequence = 0; int nbActionsInSequence = 0;
bool justFlipped = false; bool justFlipped = false;
Errors errors;
errors.newSequence();
while (!config.isFinal()) while (!config.isFinal())
{ {
TransitionMachine::State * currentState = tm.getCurrentState(); TransitionMachine::State * currentState = tm.getCurrentState();
...@@ -61,6 +64,16 @@ void Decoder::decode() ...@@ -61,6 +64,16 @@ void Decoder::decode()
} }
} }
if (classifier->needsTrain() && ProgramParameters::errorAnalysis && (classifier->name == ProgramParameters::classifierName || ProgramParameters::classifierName.empty()))
{
auto zeroCostActions = classifier->getZeroCostActions(config);
std::string oAction = zeroCostActions[0];
for (auto & s : zeroCostActions)
if (action->name == s)
oAction = s;
errors.add({action->name, oAction, weightedActions});
}
action->apply(config); action->apply(config);
TransitionMachine::Transition * transition = tm.getTransition(predictedAction); TransitionMachine::Transition * transition = tm.getTransition(predictedAction);
...@@ -71,10 +84,7 @@ void Decoder::decode() ...@@ -71,10 +84,7 @@ void Decoder::decode()
float entropy = Classifier::computeEntropy(weightedActions); float entropy = Classifier::computeEntropy(weightedActions);
config.addToEntropyHistory(entropy); config.addToEntropyHistory(entropy);
if (ProgramParameters::printEntropy)
{
nbActionsInSequence++; nbActionsInSequence++;
entropyAccumulator += entropy; entropyAccumulator += entropy;
if (config.head >= 1 && config.getTape(ProgramParameters::sequenceDelimiterTape)[config.head-1] != ProgramParameters::sequenceDelimiter) if (config.head >= 1 && config.getTape(ProgramParameters::sequenceDelimiterTape)[config.head-1] != ProgramParameters::sequenceDelimiter)
...@@ -83,15 +93,19 @@ void Decoder::decode() ...@@ -83,15 +93,19 @@ void Decoder::decode()
if ((config.head >= 1 && config.getTape(ProgramParameters::sequenceDelimiterTape)[config.head-1] == ProgramParameters::sequenceDelimiter && !justFlipped)) if ((config.head >= 1 && config.getTape(ProgramParameters::sequenceDelimiterTape)[config.head-1] == ProgramParameters::sequenceDelimiter && !justFlipped))
{ {
justFlipped = true; justFlipped = true;
errors.newSequence();
entropyAccumulator /= nbActionsInSequence; entropyAccumulator /= nbActionsInSequence;
nbActionsInSequence = 0; nbActionsInSequence = 0;
if (ProgramParameters::printEntropy)
fprintf(stderr, "Entropy : %.2f\n", entropyAccumulator); fprintf(stderr, "Entropy : %.2f\n", entropyAccumulator);
entropyAccumulator = 0.0; entropyAccumulator = 0.0;
} }
}
} }
if (ProgramParameters::errorAnalysis)
errors.printStats();
else
config.printAsOutput(stdout); config.printAsOutput(stdout);
} }
...@@ -52,7 +52,14 @@ po::options_description getOptionsDescription() ...@@ -52,7 +52,14 @@ po::options_description getOptionsDescription()
("lang", po::value<std::string>()->default_value("fr"), ("lang", po::value<std::string>()->default_value("fr"),
"Language you are working with"); "Language you are working with");
desc.add(req).add(opt); po::options_description analysis("Error analysis related options");
analysis.add_options()
("errorAnalysis", "Print an analysis of errors")
("meanEntropy", "Print the mean entropy for error types")
("classifier", po::value<std::string>()->default_value(""),
"Name of the monitored classifier, if not specified monitor everyone");
desc.add(req).add(opt).add(analysis);
return desc; return desc;
} }
...@@ -111,6 +118,8 @@ int main(int argc, char * argv[]) ...@@ -111,6 +118,8 @@ int main(int argc, char * argv[])
ProgramParameters::input = vm["input"].as<std::string>(); ProgramParameters::input = vm["input"].as<std::string>();
ProgramParameters::mcdName = vm["mcd"].as<std::string>(); ProgramParameters::mcdName = vm["mcd"].as<std::string>();
ProgramParameters::debug = vm.count("debug") == 0 ? false : true; ProgramParameters::debug = vm.count("debug") == 0 ? false : true;
ProgramParameters::errorAnalysis = vm.count("errorAnalysis") == 0 ? false : true;
ProgramParameters::meanEntropy = vm.count("meanEntropy") == 0 ? false : true;
ProgramParameters::dicts = vm["dicts"].as<std::string>(); ProgramParameters::dicts = vm["dicts"].as<std::string>();
ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true; ProgramParameters::printEntropy = vm.count("printEntropy") == 0 ? false : true;
ProgramParameters::lang = vm["lang"].as<std::string>(); ProgramParameters::lang = vm["lang"].as<std::string>();
......
...@@ -82,25 +82,55 @@ void Errors::printStats() ...@@ -82,25 +82,55 @@ void Errors::printStats()
{ {
unsigned int minDistanceToCheck = 1; unsigned int minDistanceToCheck = 1;
unsigned int maxDistanceToCheck = 5; unsigned int maxDistanceToCheck = 5;
int window = 10;
int nbErrorsToKeep = 10; int nbErrorsToKeep = 10;
std::map<std::string, int> nbErrorOccurencesByType; std::map<std::string, int> nbErrorOccurencesByType;
std::map<std::string, int> nbFirstErrorOccurencesByType;
std::map<std::string, float> nbFirstErrorIntroduced;
std::map<std::string, int> nbOccurencesByType; std::map<std::string, int> nbOccurencesByType;
std::map<std::string, float> meanEntropyByType; std::map<std::string, float> meanEntropyByType;
std::map< std::string, std::vector<int> > distanceOfGoldByType; std::map< std::string, std::vector<int> > distanceOfGoldByType;
std::map< std::string, std::vector<float> > meanEntropyByDistanceByType; std::map< std::string, std::vector<float> > meanEntropyByDistanceByType;
std::map< std::string, std::vector<int> > distanceOfGoldByFirstType;
std::map< std::string, std::vector<float> > meanEntropyByDistanceByFirstType;
int nbErrorsTotal = 0; int nbErrorsTotal = 0;
int nbFirstErrorsTotal = 0;
int nbActionsTotal = 0;
auto printLine = []()
{
for (int i = 0; i < 80; i++)
fprintf(stderr, "-");
fprintf(stderr, "\n");
};
for (auto & sequence : sequences) for (auto & sequence : sequences)
for (auto & error : sequence.getSequence())
{ {
bool firstErrorMet = false;
for (unsigned index = 0; index < sequence.getSequence().size(); index++)
{
auto & error = sequence.getSequence()[index];
nbOccurencesByType[error.getType()]++; nbOccurencesByType[error.getType()]++;
meanEntropyByType[error.getType()] += error.getEntropy(); meanEntropyByType[error.getType()] += error.getEntropy();
nbActionsTotal++;
if (!error.isError()) if (!error.isError())
{ {
} }
else else
{ {
nbErrorOccurencesByType[error.getType()]++; nbErrorOccurencesByType[error.getType()]++;
if (!firstErrorMet)
{
nbFirstErrorOccurencesByType[error.getType()]++;
nbFirstErrorsTotal++;
for (unsigned int i = index+1; i < sequence.getSequence().size(); i++)
if (sequence.getSequence()[i].isError())
{
if ((int)(i - index) > window && window)
break;
nbFirstErrorIntroduced[error.getType()] += 1;
}
}
for (unsigned int i = minDistanceToCheck; i <= maxDistanceToCheck; i++) for (unsigned int i = minDistanceToCheck; i <= maxDistanceToCheck; i++)
{ {
while (distanceOfGoldByType[error.getType()].size() < (unsigned)(i+1)) while (distanceOfGoldByType[error.getType()].size() < (unsigned)(i+1))
...@@ -109,8 +139,20 @@ void Errors::printStats() ...@@ -109,8 +139,20 @@ void Errors::printStats()
meanEntropyByDistanceByType[error.getType()].emplace_back(0.0); meanEntropyByDistanceByType[error.getType()].emplace_back(0.0);
distanceOfGoldByType[error.getType()][i] += error.goldWasAtDistance(i) ? 1 : 0; distanceOfGoldByType[error.getType()][i] += error.goldWasAtDistance(i) ? 1 : 0;
meanEntropyByDistanceByType[error.getType()][i] += error.goldWasAtDistance(i) ? error.getEntropy() : 0; meanEntropyByDistanceByType[error.getType()][i] += error.goldWasAtDistance(i) ? error.getEntropy() : 0;
if (!firstErrorMet)
{
while (distanceOfGoldByFirstType[error.getType()].size() < (unsigned)(i+1))
distanceOfGoldByFirstType[error.getType()].emplace_back(0);
while (meanEntropyByDistanceByFirstType[error.getType()].size() < (unsigned)(i+1))
meanEntropyByDistanceByFirstType[error.getType()].emplace_back(0.0);
distanceOfGoldByFirstType[error.getType()][i] += error.goldWasAtDistance(i) ? 1 : 0;
meanEntropyByDistanceByFirstType[error.getType()][i] += error.goldWasAtDistance(i) ? error.getEntropy() : 0;
}
} }
nbErrorsTotal++; nbErrorsTotal++;
firstErrorMet = true;
}
} }
} }
...@@ -118,9 +160,16 @@ void Errors::printStats() ...@@ -118,9 +160,16 @@ void Errors::printStats()
for (unsigned int i = 0; i < it.second.size(); i++) for (unsigned int i = 0; i < it.second.size(); i++)
it.second[i] /= distanceOfGoldByType[it.first][i]; it.second[i] /= distanceOfGoldByType[it.first][i];
for (auto & it : meanEntropyByDistanceByFirstType)
for (unsigned int i = 0; i < it.second.size(); i++)
it.second[i] /= distanceOfGoldByFirstType[it.first][i];
for (auto & it : meanEntropyByType) for (auto & it : meanEntropyByType)
it.second /= nbOccurencesByType[it.first]; it.second /= nbOccurencesByType[it.first];
for (auto & it : nbFirstErrorOccurencesByType)
nbFirstErrorIntroduced[it.first] /= it.second;
std::vector< std::pair<std::string,int> > typesOccurences; std::vector< std::pair<std::string,int> > typesOccurences;
for (auto & it : nbErrorOccurencesByType) for (auto & it : nbErrorOccurencesByType)
typesOccurences.emplace_back(std::pair<std::string,int>(it.first,it.second)); typesOccurences.emplace_back(std::pair<std::string,int>(it.first,it.second));
...@@ -131,18 +180,22 @@ void Errors::printStats() ...@@ -131,18 +180,22 @@ void Errors::printStats()
return a.second > b.second; return a.second > b.second;
}); });
typesOccurences.resize(nbErrorsToKeep); typesOccurences.resize(std::min(nbErrorsToKeep, (int)typesOccurences.size()));
fprintf(stderr, "%.2f%% of predicted actions where correct (%d / %d)\n",
100.0*(nbActionsTotal-nbErrorsTotal)/nbActionsTotal, nbActionsTotal-nbErrorsTotal,nbActionsTotal);
fprintf(stderr, "Format : Predicted->Gold\n");
std::vector< std::vector<std::string> > columns; std::vector< std::vector<std::string> > columns;
columns.clear(); columns.clear();
columns.resize(5); columns.resize(ProgramParameters::meanEntropy ? 5 : 4);
for (auto & it : typesOccurences) for (auto & it : typesOccurences)
{ {
columns[0].emplace_back(it.first); columns[0].emplace_back(it.first);
columns[1].emplace_back("= " + float2str(it.second*100.0/nbErrorsTotal,"%.2f%%")); columns[1].emplace_back("= " + float2str(it.second*100.0/nbErrorsTotal,"%.2f%%"));
columns[2].emplace_back("of errors"); columns[2].emplace_back("of errors");
columns[3].emplace_back("("+std::to_string(it.second) + " / " + std::to_string(nbErrorsTotal) + ")"); columns[3].emplace_back("("+std::to_string(it.second) + " / " + std::to_string(nbErrorsTotal) + ")");
if (ProgramParameters::meanEntropy)
columns[4].emplace_back("mean entropy : " + float2str(meanEntropyByType[it.first], "%.2f")); columns[4].emplace_back("mean entropy : " + float2str(meanEntropyByType[it.first], "%.2f"));
for (unsigned int dist = minDistanceToCheck; dist <= maxDistanceToCheck; dist++) for (unsigned int dist = minDistanceToCheck; dist <= maxDistanceToCheck; dist++)
...@@ -151,7 +204,51 @@ void Errors::printStats() ...@@ -151,7 +204,51 @@ void Errors::printStats()
columns[1].emplace_back(std::to_string(dist)); columns[1].emplace_back(std::to_string(dist));
columns[2].emplace_back(float2str(distanceOfGoldByType[it.first][dist]*100.0/nbErrorOccurencesByType[it.first],"%.2f%%")); columns[2].emplace_back(float2str(distanceOfGoldByType[it.first][dist]*100.0/nbErrorOccurencesByType[it.first],"%.2f%%"));
columns[3].emplace_back("of the time"); columns[3].emplace_back("of the time");
columns[4].emplace_back("with mean entropy : " + float2str(meanEntropyByDistanceByType[it.first][dist], "%.2f")); if (ProgramParameters::meanEntropy)
columns[4].emplace_back("mean entropy : " + float2str(meanEntropyByDistanceByType[it.first][dist], "%.2f"));
}
for (auto & col : columns)
col.emplace_back("");
}
printLine();
printColumns(stderr, columns, 1);
printLine();
std::vector< std::pair<std::string,int> > typesFirstOccurences;
for (auto & it : nbFirstErrorOccurencesByType)
typesFirstOccurences.emplace_back(std::pair<std::string,int>(it.first,it.second));
std::sort(typesFirstOccurences.begin(), typesFirstOccurences.end(),
[](const std::pair<std::string,int> & a, const std::pair<std::string,int> & b)
{
return a.second > b.second;
});
typesFirstOccurences.resize(std::min(nbErrorsToKeep, (int)typesFirstOccurences.size()));
columns.clear();
columns.resize(ProgramParameters::meanEntropy ? 6 : 5);
for (auto & it : typesFirstOccurences)
{
columns[0].emplace_back(it.first);
columns[1].emplace_back("= " + float2str(it.second*100.0/nbFirstErrorsTotal,"%.2f%%"));
columns[2].emplace_back("of first errors");
columns[3].emplace_back("("+std::to_string(it.second) + " / " + std::to_string(nbFirstErrorsTotal) + ")");
columns[4].emplace_back("introduces " + float2str(nbFirstErrorIntroduced[it.first],"%.2f errors"));
if (ProgramParameters::meanEntropy)
columns[5].emplace_back("mean entropy : " + float2str(meanEntropyByType[it.first], "%.2f"));
for (unsigned int dist = minDistanceToCheck; dist <= maxDistanceToCheck; dist++)
{
columns[0].emplace_back(" Gold at distance");
columns[1].emplace_back(std::to_string(dist));
columns[2].emplace_back(float2str(distanceOfGoldByFirstType[it.first][dist]*100.0/nbFirstErrorOccurencesByType[it.first],"%.2f%%"));
columns[3].emplace_back("of the time");
columns[4].emplace_back("");
if (ProgramParameters::meanEntropy)
columns[5].emplace_back("mean entropy : " + float2str(meanEntropyByDistanceByFirstType[it.first][dist], "%.2f"));
} }
for (auto & col : columns) for (auto & col : columns)
...@@ -159,5 +256,6 @@ void Errors::printStats() ...@@ -159,5 +256,6 @@ void Errors::printStats()
} }
printColumns(stderr, columns, 1); printColumns(stderr, columns, 1);
printLine();
} }
...@@ -55,6 +55,8 @@ struct ProgramParameters ...@@ -55,6 +55,8 @@ struct ProgramParameters
static int batchSize; static int batchSize;
static std::string loss; static std::string loss;
static std::string dicts; static std::string dicts;
static bool errorAnalysis;
static bool meanEntropy;
static std::map<std::string,std::string> featureModelByClassifier; static std::map<std::string,std::string> featureModelByClassifier;
private : private :
......
...@@ -42,6 +42,8 @@ bool ProgramParameters::randomEmbeddings; ...@@ -42,6 +42,8 @@ bool ProgramParameters::randomEmbeddings;
bool ProgramParameters::randomParameters; bool ProgramParameters::randomParameters;
bool ProgramParameters::printEntropy; bool ProgramParameters::printEntropy;
bool ProgramParameters::printTime; bool ProgramParameters::printTime;
bool ProgramParameters::errorAnalysis;
bool ProgramParameters::meanEntropy;
int ProgramParameters::iterationSize; int ProgramParameters::iterationSize;
int ProgramParameters::nbTrain; int ProgramParameters::nbTrain;
std::string ProgramParameters::sequenceDelimiterTape; std::string ProgramParameters::sequenceDelimiterTape;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment