diff --git a/error_correction/include/Error.hpp b/error_correction/include/Error.hpp index 4a49d6bc9eb5026b6bc50c38c01c093cda381671..5f9f8d82dc5bec350a85f1e07f5d4ca2f873487e 100644 --- a/error_correction/include/Error.hpp +++ b/error_correction/include/Error.hpp @@ -21,13 +21,15 @@ class Error int indexOfPrediction; int indexOfGold; int distanceWithGold; + float entropy; public : Error(std::string &, std::string &, Classifier::WeightedActions &); bool isError() const; const std::string & getType() const; - const bool goldWasAtDistance(); + bool goldWasAtDistance(int distance) const; + float getEntropy() const; }; class ErrorSequence diff --git a/error_correction/src/Error.cpp b/error_correction/src/Error.cpp index bf9454079e86c1afde2e9175ee1a5b4d34d5a558..a99c5c55c3ae1f9576e0c5c8be1cf6834a0b158a 100644 --- a/error_correction/src/Error.cpp +++ b/error_correction/src/Error.cpp @@ -7,6 +7,7 @@ prediction(prediction), gold(gold), weightedActions(weightedActions) indexOfPrediction = -1; indexOfGold = -1; distanceWithGold = 0; + entropy = Classifier::computeEntropy(weightedActions); for (unsigned int i = 0; i < weightedActions.size(); i++) { @@ -31,11 +32,21 @@ prediction(prediction), gold(gold), weightedActions(weightedActions) } } +bool Error::goldWasAtDistance(int distance) const +{ + return distanceWithGold == distance; +} + const std::string & Error::getType() const { return type; } +float Error::getEntropy() const +{ + return entropy; +} + void ErrorSequence::add(const Error & error) { sequence.emplace_back(error); @@ -69,25 +80,49 @@ bool Error::isError() const void Errors::printStats() { + unsigned int minDistanceToCheck = 1; + unsigned int maxDistanceToCheck = 5; + int nbErrorsToKeep = 10; + std::map<std::string, int> nbErrorOccurencesByType; std::map<std::string, int> nbOccurencesByType; + std::map<std::string, float> meanEntropyByType; + std::map< std::string, std::vector<int> > distanceOfGoldByType; + std::map< std::string, std::vector<float> > meanEntropyByDistanceByType; int nbErrorsTotal = 0; for (auto & sequence : sequences) for (auto & error : sequence.getSequence()) { + nbOccurencesByType[error.getType()]++; + meanEntropyByType[error.getType()] += error.getEntropy(); if (!error.isError()) { - } else { - nbOccurencesByType[error.getType()]++; + nbErrorOccurencesByType[error.getType()]++; + for (unsigned int i = minDistanceToCheck; i <= maxDistanceToCheck; i++) + { + while (distanceOfGoldByType[error.getType()].size() < (unsigned)(i+1)) + distanceOfGoldByType[error.getType()].emplace_back(0); + while (meanEntropyByDistanceByType[error.getType()].size() < (unsigned)(i+1)) + meanEntropyByDistanceByType[error.getType()].emplace_back(0.0); + distanceOfGoldByType[error.getType()][i] += error.goldWasAtDistance(i) ? 1 : 0; + meanEntropyByDistanceByType[error.getType()][i] += error.goldWasAtDistance(i) ? error.getEntropy() : 0; + } nbErrorsTotal++; } } + for (auto & it : meanEntropyByDistanceByType) + for (unsigned int i = 0; i < it.second.size(); i++) + it.second[i] /= distanceOfGoldByType[it.first][i]; + + for (auto & it : meanEntropyByType) + it.second /= nbOccurencesByType[it.first]; + std::vector< std::pair<std::string,int> > typesOccurences; - for (auto & it : nbOccurencesByType) + for (auto & it : nbErrorOccurencesByType) typesOccurences.emplace_back(std::pair<std::string,int>(it.first,it.second)); std::sort(typesOccurences.begin(), typesOccurences.end(), @@ -96,17 +131,31 @@ void Errors::printStats() return a.second > b.second; }); + typesOccurences.resize(nbErrorsToKeep); + std::vector< std::vector<std::string> > columns; columns.clear(); - columns.resize(4); + columns.resize(5); for (auto & it : typesOccurences) { - columns[0].emplace_back(it.first); columns[1].emplace_back("= " + float2str(it.second*100.0/nbErrorsTotal,"%.2f%%")); - columns[2].emplace_back(" of errors (" + std::to_string(it.second)); - columns[3].emplace_back(" / " + std::to_string(nbErrorsTotal) + ")"); + columns[2].emplace_back("of errors"); + columns[3].emplace_back("("+std::to_string(it.second) + " / " + std::to_string(nbErrorsTotal) + ")"); + columns[4].emplace_back("mean entropy : " + float2str(meanEntropyByType[it.first], "%.2f")); + + for (unsigned int dist = minDistanceToCheck; dist <= maxDistanceToCheck; dist++) + { + columns[0].emplace_back(" Gold at distance"); + columns[1].emplace_back(std::to_string(dist)); + columns[2].emplace_back(float2str(distanceOfGoldByType[it.first][dist]*100.0/nbErrorOccurencesByType[it.first],"%.2f%%")); + columns[3].emplace_back("of the time"); + columns[4].emplace_back("with mean entropy : " + float2str(meanEntropyByDistanceByType[it.first][dist], "%.2f")); + } + + for (auto & col : columns) + col.emplace_back(""); } printColumns(stderr, columns, 1);