Skip to content
Snippets Groups Projects
Commit 18daad6f authored by Franck Dary's avatar Franck Dary
Browse files

Added infos to error analysis

parent fa3f47ec
No related branches found
No related tags found
No related merge requests found
......@@ -21,13 +21,15 @@ class Error
int indexOfPrediction;
int indexOfGold;
int distanceWithGold;
float entropy;
public :
Error(std::string &, std::string &, Classifier::WeightedActions &);
bool isError() const;
const std::string & getType() const;
const bool goldWasAtDistance();
bool goldWasAtDistance(int distance) const;
float getEntropy() const;
};
class ErrorSequence
......
......@@ -7,6 +7,7 @@ prediction(prediction), gold(gold), weightedActions(weightedActions)
indexOfPrediction = -1;
indexOfGold = -1;
distanceWithGold = 0;
entropy = Classifier::computeEntropy(weightedActions);
for (unsigned int i = 0; i < weightedActions.size(); i++)
{
......@@ -31,11 +32,21 @@ prediction(prediction), gold(gold), weightedActions(weightedActions)
}
}
bool Error::goldWasAtDistance(int distance) const
{
return distanceWithGold == distance;
}
const std::string & Error::getType() const
{
return type;
}
float Error::getEntropy() const
{
return entropy;
}
void ErrorSequence::add(const Error & error)
{
sequence.emplace_back(error);
......@@ -69,25 +80,49 @@ bool Error::isError() const
void Errors::printStats()
{
unsigned int minDistanceToCheck = 1;
unsigned int maxDistanceToCheck = 5;
int nbErrorsToKeep = 10;
std::map<std::string, int> nbErrorOccurencesByType;
std::map<std::string, int> nbOccurencesByType;
std::map<std::string, float> meanEntropyByType;
std::map< std::string, std::vector<int> > distanceOfGoldByType;
std::map< std::string, std::vector<float> > meanEntropyByDistanceByType;
int nbErrorsTotal = 0;
for (auto & sequence : sequences)
for (auto & error : sequence.getSequence())
{
nbOccurencesByType[error.getType()]++;
meanEntropyByType[error.getType()] += error.getEntropy();
if (!error.isError())
{
}
else
{
nbOccurencesByType[error.getType()]++;
nbErrorOccurencesByType[error.getType()]++;
for (unsigned int i = minDistanceToCheck; i <= maxDistanceToCheck; i++)
{
while (distanceOfGoldByType[error.getType()].size() < (unsigned)(i+1))
distanceOfGoldByType[error.getType()].emplace_back(0);
while (meanEntropyByDistanceByType[error.getType()].size() < (unsigned)(i+1))
meanEntropyByDistanceByType[error.getType()].emplace_back(0.0);
distanceOfGoldByType[error.getType()][i] += error.goldWasAtDistance(i) ? 1 : 0;
meanEntropyByDistanceByType[error.getType()][i] += error.goldWasAtDistance(i) ? error.getEntropy() : 0;
}
nbErrorsTotal++;
}
}
for (auto & it : meanEntropyByDistanceByType)
for (unsigned int i = 0; i < it.second.size(); i++)
it.second[i] /= distanceOfGoldByType[it.first][i];
for (auto & it : meanEntropyByType)
it.second /= nbOccurencesByType[it.first];
std::vector< std::pair<std::string,int> > typesOccurences;
for (auto & it : nbOccurencesByType)
for (auto & it : nbErrorOccurencesByType)
typesOccurences.emplace_back(std::pair<std::string,int>(it.first,it.second));
std::sort(typesOccurences.begin(), typesOccurences.end(),
......@@ -96,17 +131,31 @@ void Errors::printStats()
return a.second > b.second;
});
typesOccurences.resize(nbErrorsToKeep);
std::vector< std::vector<std::string> > columns;
columns.clear();
columns.resize(4);
columns.resize(5);
for (auto & it : typesOccurences)
{
columns[0].emplace_back(it.first);
columns[1].emplace_back("= " + float2str(it.second*100.0/nbErrorsTotal,"%.2f%%"));
columns[2].emplace_back(" of errors (" + std::to_string(it.second));
columns[3].emplace_back(" / " + std::to_string(nbErrorsTotal) + ")");
columns[2].emplace_back("of errors");
columns[3].emplace_back("("+std::to_string(it.second) + " / " + std::to_string(nbErrorsTotal) + ")");
columns[4].emplace_back("mean entropy : " + float2str(meanEntropyByType[it.first], "%.2f"));
for (unsigned int dist = minDistanceToCheck; dist <= maxDistanceToCheck; dist++)
{
columns[0].emplace_back(" Gold at distance");
columns[1].emplace_back(std::to_string(dist));
columns[2].emplace_back(float2str(distanceOfGoldByType[it.first][dist]*100.0/nbErrorOccurencesByType[it.first],"%.2f%%"));
columns[3].emplace_back("of the time");
columns[4].emplace_back("with mean entropy : " + float2str(meanEntropyByDistanceByType[it.first][dist], "%.2f"));
}
for (auto & col : columns)
col.emplace_back("");
}
printColumns(stderr, columns, 1);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment