diff --git a/decoder/src/Decoder.cpp b/decoder/src/Decoder.cpp index 9f5ab48dc1654a6faac0a37f0478b2a8dd103ae8..bfd694e266719173aa9e73cd806e30c74e4e4f0e 100644 --- a/decoder/src/Decoder.cpp +++ b/decoder/src/Decoder.cpp @@ -1,6 +1,7 @@ #include "Decoder.hpp" #include "util.hpp" #include "Error.hpp" +#include "ActionBank.hpp" Decoder::Decoder(TransitionMachine & tm, Config & config) : tm(tm), config(config) @@ -71,7 +72,9 @@ void Decoder::decode() if (action->name == s) oAction = s; int actionCost = classifier->getActionCost(config, action->name); - errors.add({action->name, oAction, weightedActions, actionCost}); + int linkLengthPrediction = ActionBank::getLinkLength(config, action->name); + int linkLengthGold = ActionBank::getLinkLength(config, oAction); + errors.add({action->name, oAction, weightedActions, actionCost, linkLengthPrediction, linkLengthGold}); } action->apply(config); diff --git a/error_correction/include/Error.hpp b/error_correction/include/Error.hpp index da1259a173243f87ea8c28825eee044bf259004d..9ad268c99178a813e3c7df88b80b9460a3696a04 100644 --- a/error_correction/include/Error.hpp +++ b/error_correction/include/Error.hpp @@ -23,15 +23,19 @@ class Error int distanceWithGold; float entropy; int cost; + int linkLengthPrediction; + int linkLengthGold; public : - Error(std::string &, std::string &, Classifier::WeightedActions &, int cost); + Error(std::string &, std::string &, Classifier::WeightedActions &, int cost, int linkLengthPrediction, int linkLengthGold); bool isError() const; const std::string & getType() const; bool goldWasAtDistance(int distance) const; float getEntropy() const; float getCost() const; + int getLinkLengthPrediction() const; + int getLinkLengthGold() const; }; class ErrorSequence diff --git a/error_correction/src/Error.cpp b/error_correction/src/Error.cpp index 97a33d07f8c1be992a726613fc04d32a5c22d15a..141e0e11b848e6af4623087065704b77121dd260 100644 --- a/error_correction/src/Error.cpp +++ b/error_correction/src/Error.cpp @@ -1,7 +1,7 @@ #include "Error.hpp" -Error::Error(std::string & prediction, std::string & gold, Classifier::WeightedActions & weightedActions, int cost) : -prediction(prediction), gold(gold), weightedActions(weightedActions), cost(cost) +Error::Error(std::string & prediction, std::string & gold, Classifier::WeightedActions & weightedActions, int cost, int linkLengthPrediction, int linkLengthGold) : +prediction(prediction), gold(gold), weightedActions(weightedActions), cost(cost), linkLengthPrediction(linkLengthPrediction), linkLengthGold(linkLengthGold) { type = prediction + "->" + gold; if (ProgramParameters::onlyPrefixes) @@ -100,6 +100,10 @@ void Errors::printStats() std::map< std::string, std::vector<float> > meanEntropyByDistanceByType; std::map< std::string, std::vector<int> > distanceOfGoldByFirstType; std::map< std::string, std::vector<float> > meanEntropyByDistanceByFirstType; + std::map<int, int> hypothesisByLength; + std::map<int, int> referenceByLength; + std::map<int, float> hypothesisErrorByLength; + std::map<int, float> referenceErrorByLength; int nbErrorsTotal = 0; int nbFirstErrorsTotal = 0; int nbActionsTotal = 0; @@ -120,12 +124,16 @@ void Errors::printStats() nbOccurencesByType[error.getType()]++; meanEntropyByType[error.getType()] += error.getEntropy(); nbActionsTotal++; + hypothesisByLength[error.getLinkLengthPrediction()]++; + referenceByLength[error.getLinkLengthGold()]++; if (!error.isError()) { } else { nbErrorOccurencesByType[error.getType()]++; + hypothesisErrorByLength[error.getLinkLengthPrediction()]++; + referenceErrorByLength[error.getLinkLengthGold()]++; if (!firstErrorMet) { nbFirstErrorOccurencesByType[error.getType()]++; @@ -265,5 +273,40 @@ void Errors::printStats() printColumns(stderr, columns, 1); printLine(); + columns.clear(); + columns.resize(4); + + for (int i = 0; i < 30; i++) + { + if (!hypothesisByLength.count(i)) + continue; + + int totalHypo = hypothesisByLength[i]; + int errorsHypo = hypothesisErrorByLength[i]; + int totalGold = referenceByLength[i]; + int errorsGold = referenceErrorByLength[i]; + float percHypo = 100.0*errorsHypo / totalHypo; + float percGold = 100.0*errorsGold / totalGold; + columns[0].emplace_back("Errors when link is of length " + std::to_string(i)); + columns[1].emplace_back("in the hypothesis"); + columns[2].emplace_back(": "+float2str(percHypo, "%.2f%%")); + columns[3].emplace_back("("+std::to_string(errorsHypo)+"/"+std::to_string(totalHypo)+")"); + columns[0].emplace_back("Errors when link is of length " + std::to_string(i)); + columns[1].emplace_back("in the reference"); + columns[2].emplace_back(": "+float2str(percGold, "%.2f%%")); + columns[3].emplace_back("("+std::to_string(errorsGold)+"/"+std::to_string(totalGold)+")"); + } + + printColumns(stderr, columns, 1); +} + +int Error::getLinkLengthPrediction() const +{ + return linkLengthPrediction; +} + +int Error::getLinkLengthGold() const +{ + return linkLengthGold; } diff --git a/error_correction/src/macaon_error_correction.cpp b/error_correction/src/macaon_error_correction.cpp index 1e45d9773114331c5dfbd655e7c06fb11c209ef5..9757e6788f881d7e6c798341e63405ae94cfba58 100644 --- a/error_correction/src/macaon_error_correction.cpp +++ b/error_correction/src/macaon_error_correction.cpp @@ -11,6 +11,7 @@ #include "TransitionMachine.hpp" #include "util.hpp" #include "Error.hpp" +#include "ActionBank.hpp" namespace po = boost::program_options; @@ -217,7 +218,7 @@ int main(int argc, char * argv[]) if (configIsError) { - errors.add({action->name, zeroCostActions[0], weightedActions, classifier->getActionCost(config, action->name)}); + errors.add({action->name, zeroCostActions[0], weightedActions, classifier->getActionCost(config, action->name), ActionBank::getLinkLength(config, action->name), ActionBank::getLinkLength(config, zeroCostActions[0])}); } } diff --git a/transition_machine/include/ActionBank.hpp b/transition_machine/include/ActionBank.hpp index 365c24368aa68098183dbecf8a33702a7dfddc0f..c1bdc26071b99ab541157a7a9d508b5869a4aeda 100644 --- a/transition_machine/include/ActionBank.hpp +++ b/transition_machine/include/ActionBank.hpp @@ -25,6 +25,15 @@ class ActionBank /// /// @return The corresponding sequence of BasicAction. static std::vector<Action::BasicAction> str2sequence(const std::string & name); + /// @brief Return the lenght of the link that the action will create. Only used in error analysis. + /// + /// For instance the link length for dependency parsing will be the absolute distance between the head and the dependent. + /// + /// @param c Current Config + /// @param action The name of the action that will create the link + /// + /// @return The length of the link the action will create, default value 0 if not appliable. + static int getLinkLength(const Config & c, const std::string & action); private : @@ -70,7 +79,6 @@ class ActionBank /// @param rule The rule to apply. /// @param relativeIndex The index of the column that will be read and written into, relatively to the head of the Config. static void writeRuleResult(Config & config, const std::string & fromTapeName, const std::string & targetTapeName, const std::string & rule, int relativeIndex); - }; #endif diff --git a/transition_machine/include/Config.hpp b/transition_machine/include/Config.hpp index f8b047feb62c0ae8e1966ef221cf20a1b91d0a78..d72c35d30c032554c6c19f08db27bce206dc870c 100644 --- a/transition_machine/include/Config.hpp +++ b/transition_machine/include/Config.hpp @@ -138,17 +138,17 @@ class Config /// @param index The depth of the requested element. /// /// @return The requested element. - int stackGetElem(int index); + int stackGetElem(int index) const; /// @brief Return true if the stack has an element of depth index. /// /// @param index The depth of the requested element. /// /// @return True if the stack has an element of depth index. - bool stackHasIndex(int index); + bool stackHasIndex(int index) const; /// @brief Return true if the stack is empty. /// /// @return True if the stack is empty. - bool stackEmpty(); + bool stackEmpty() const; /// @brief Pop the stack. void stackPop(); /// @brief Push elem to the stack. @@ -162,7 +162,7 @@ class Config /// @brief Return the number of elements in the stack. /// /// @return The number of elements in the stack. - int stackSize(); + int stackSize() const; /// @brief Load a Config to match the one that has been written to file, /// formated by printAsExample. /// diff --git a/transition_machine/src/ActionBank.cpp b/transition_machine/src/ActionBank.cpp index 0db712d5deb7d8d0c3381cf3d4157231f3878e7d..5aa3b4d0303246e4c4bde53683f975f6af830508 100644 --- a/transition_machine/src/ActionBank.cpp +++ b/transition_machine/src/ActionBank.cpp @@ -523,3 +523,22 @@ void ActionBank::writeRuleResult(Config & config, const std::string & fromTapeNa toTape.hyp[config.head + relativeIndex] = applyRule(from, rule); } +int ActionBank::getLinkLength(const Config & c, const std::string & action) +{ + auto splitted = split(action, ' '); + auto & name = splitted[0]; + if (name == "LEFT" || name == "RIGHT" || name == "EOS") + { + if (c.stackEmpty()) + { + fprintf(stderr, "ERROR (%s) : stack is empty. Aborting.\n", ERRINFO); + exit(1); + } + + int stackIndex = c.stackGetElem(0); + return std::abs(c.head - stackIndex); + } + + return 0; +} + diff --git a/transition_machine/src/Config.cpp b/transition_machine/src/Config.cpp index 972656431052cff088b20ef54822ef9f4bbb2d60..f681fd950e94f3e1c87352404a59d0b78c7a1641 100644 --- a/transition_machine/src/Config.cpp +++ b/transition_machine/src/Config.cpp @@ -300,7 +300,7 @@ void Config::shuffle(const std::string & delimiterTape, const std::string & deli tapes = newTapes; } -int Config::stackGetElem(int index) +int Config::stackGetElem(int index) const { if (index == -1) return stackHistory; @@ -314,12 +314,12 @@ int Config::stackGetElem(int index) return stack[stack.size()-1-index]; } -bool Config::stackHasIndex(int index) +bool Config::stackHasIndex(int index) const { return index == -1 || (index >= 0 && index < (int)stack.size()); } -bool Config::stackEmpty() +bool Config::stackEmpty() const { return !stackHasIndex(0); } @@ -352,7 +352,7 @@ int Config::stackTop() return stack.back(); } -int Config::stackSize() +int Config::stackSize() const { return stack.size(); }