diff --git a/src/alignments.cc b/src/alignments.cc index fd1854f0dcdd5976808b79e8a8a5e45ba1524883..70aaa9d7bab2e6c587e21c312b0a8d313723477f 100644 --- a/src/alignments.cc +++ b/src/alignments.cc @@ -3,7 +3,6 @@ #include "modules/sequence_extractors.hh" #include "modules/sequence_alignment.hh" #include "modules/similarity_functions.hh" -#include "modules/decision_resolver.hh" #include <getopt.h> #include <cmath> @@ -27,43 +26,45 @@ std::pair<decoda::Extractor, decoda::Similarity> ExtractorFromString(std::string if (str == "speaker") return {decoda::Speaker, decoda::speaker_similarity}; if (str == "responsetime") - return {decoda::ResponseTime, decoda::response_time_similarity}; + return {decoda::ResponseTime, decoda::continuous_similarity}; + if (str == "length") + return {decoda::TurnLength, decoda::identity_similarity}; throw std::invalid_argument(str + " is not a valid extractor"); } void HelpMessage(std::ostream &output, const std::string prog_name) { - output << "Usage: " << prog_name - << " [OPTIONS] -x mode -d stem_dir extractor1 [extractor2 ...]" - << std::endl; - output << "Options (All):" << std::endl; - output << "\t-o output -- Main output file (affinity matrix or dictionnary)" << std::endl; - output << "\t-m dialoguemap -- Output file to store the dialogue mapping in" << std::endl; - output << "\t-v -- Adds one level to the verbose level" << std::endl; - output << "\t-i -- The initial gap penality" << std::endl; - output << "\t-e -- The extended gap penality" << std::endl; - output << "Options (Mode 0):" << std::endl; - output << "\t-a align_method -- Alignment method to use (required with mode 0)" << std::endl; - output << "Options (Mode 1 and 2):" << std::endl; - output << "\t--perfectmatch -- Only allows perfect matches in dictionnary keys" << std::endl; - output << "\t--partialmatch -- Allow some gaps and mismatches in dictionnary keys" << std::endl; - output << "Modes:" << std::endl; - output << "\t 0: Builds an affinity matrix" << std::endl; - output << "\t 1: Builds an alignment dictionnary" << std::endl; - output << "\t 2: Builds vectors for each dialogue" << std::endl; - output << "Align Methods: 'smithwaterman' or 'needlemanwunsch'" << std::endl; - output << "Extractors: 'dialogueacts', 'polarity', 'speaker' and 'responsetime'" - << std::endl; + output << "Usage: " << prog_name + << " [OPTIONS] -x mode -d stem_dir extractor1 [extractor2 ...]" + << std::endl; + output << "Options (All):" << std::endl; + output << "\t-o output -- Main output file (affinity matrix or dictionnary)" << std::endl; + output << "\t-m dialoguemap -- Output file to store the dialogue mapping in" << std::endl; + output << "\t-v -- Adds one level to the verbose level" << std::endl; + output << "\t-i -- The initial gap penality" << std::endl; + output << "\t-e -- The extended gap penality" << std::endl; + output << "Options (Mode 0):" << std::endl; + output << "\t-a align_method -- Alignment method to use (required with mode 0)" << std::endl; + output << "Options (Mode 1 and 2):" << std::endl; + output << "\t--perfectmatch -- Only allows perfect matches in dictionnary keys" << std::endl; + output << "\t--partialmatch -- Allow some gaps and mismatches in dictionnary keys" << std::endl; + output << "Modes:" << std::endl; + output << "\t 0: Builds an affinity matrix" << std::endl; + output << "\t 1: Builds an alignment dictionnary" << std::endl; + output << "\t 2: Builds vectors for each dialogue" << std::endl; + output << "Align Methods: 'smithwaterman' or 'needlemanwunsch'" << std::endl; + output << "Extractors: 'dialogueacts', 'polarity', 'speaker' and 'length'" + << std::endl; } void BuildAffinityMatrix(std::vector<decoda::Sequence> &sequences, - std::vector<decoda::Similarity> &similarity_funcs, - double init_gap, double extend_gap, - decoda::AlignmentFunc alignment_func, - std::ostream &output) { + std::vector<decoda::Similarity> &similarity_funcs, + double init_gap, double extend_gap, + decoda::AlignmentFunc alignment_func, + std::ostream &output) { int nb_dialogues = sequences.size(); - if (!verbose) + if (verbose) std::cerr << "Building affinity matrix..." << std::endl; int count = 0; @@ -71,13 +72,13 @@ void BuildAffinityMatrix(std::vector<decoda::Sequence> &sequences, for (int j = i; j < nb_dialogues; j++) { if (verbose > 1 && count % 100 == 0) { std::cerr << "Progress: " << count << "/" - << (nb_dialogues+1)*nb_dialogues / 2 << "\r"; + << (nb_dialogues+1)*nb_dialogues / 2 << "\r"; std::cerr.flush(); } count++; output << decoda::AlignmentSimilarity(sequences[i], sequences[j], - similarity_funcs, - init_gap, extend_gap, alignment_func) << " "; + similarity_funcs, + init_gap, extend_gap, alignment_func) << " "; } output << std::endl; } @@ -96,9 +97,9 @@ std::string SequenceToString(const decoda::Sequence &sequence) { str += "("; for (auto &value : tuple) { if (first) - str += boost::lexical_cast<std::string>(value); + str += boost::lexical_cast<std::string>(value); else - str += "," + boost::lexical_cast<std::string>(value); + str += "," + boost::lexical_cast<std::string>(value); first = false; } str += ")"; @@ -109,7 +110,7 @@ std::string SequenceToString(const decoda::Sequence &sequence) { } void AddAlignmentToDictionnary(decoda::Alignment &alignment, - std::map<decoda::Sequence, int> &dictionnary) { + std::map<decoda::Sequence, int> &dictionnary) { decoda::Sequence sequence; std::vector<decoda::ExtractorTypes> mismatch; @@ -142,7 +143,7 @@ void AddAlignmentToDictionnary(decoda::Alignment &alignment, } void AddSubAlignmentsToDictionnary(decoda::Alignment &alignment, - std::map<decoda::Sequence, int> &dictionnary) { + std::map<decoda::Sequence, int> &dictionnary) { int s1_idx = alignment.topleft.first; decoda::Sequence subsequence; @@ -152,16 +153,16 @@ void AddSubAlignmentsToDictionnary(decoda::Alignment &alignment, s1_idx++; } else { if (!subsequence.empty()) { - if (dictionnary.count(subsequence)) { - dictionnary[subsequence]++; - } else { - dictionnary[subsequence] = 1; - } + if (dictionnary.count(subsequence)) { + dictionnary[subsequence]++; + } else { + dictionnary[subsequence] = 1; + } - subsequence.clear(); + subsequence.clear(); } if (action == 'D' || action == 'X') { - s1_idx++; + s1_idx++; } } } @@ -176,12 +177,12 @@ void AddSubAlignmentsToDictionnary(decoda::Alignment &alignment, } void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences, - std::vector<decoda::Similarity> &similarity_funcs, - double init_gap, double extend_gap, double min_score, - std::ostream &output, bool perfect_match) { + std::vector<decoda::Similarity> &similarity_funcs, + double init_gap, double extend_gap, double min_score, + std::ostream &output, bool perfect_match) { size_t nb_dialogues = sequences.size(); - if (!verbose) + if (verbose) std::cerr << "Building dictionnary..." << std::endl; std::map<decoda::Sequence, int> dictionnary; @@ -190,19 +191,19 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences, for (size_t j = i+1; j < nb_dialogues; ++j) { if (verbose > 1 && count % 100 == 0) { std::cerr << "Progress: " << count << "/" - << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r"; + << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r"; std::cerr.flush(); } count++; std::vector<decoda::Alignment> alignments = decoda::WatermanEggert(sequences[i], sequences[j], - similarity_funcs, - init_gap, extend_gap, - min_score); + similarity_funcs, + init_gap, extend_gap, + min_score); for (decoda::Alignment &alignment : alignments) { - if (perfect_match) - AddSubAlignmentsToDictionnary(alignment, dictionnary); - else - AddAlignmentToDictionnary(alignment, dictionnary); + if (perfect_match) + AddSubAlignmentsToDictionnary(alignment, dictionnary); + else + AddAlignmentToDictionnary(alignment, dictionnary); } } } @@ -210,7 +211,7 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences, if (verbose > 1) std::cerr << std::endl << "Done." << std::endl; - if (!verbose) + if (verbose) std::cerr << "Dictionnary size: " << dictionnary.size() << std::endl; for (auto &kv : dictionnary) { @@ -219,34 +220,56 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences, } void AddAlignmentToVectors(decoda::Alignment alignment, - std::map<decoda::Sequence, std::set<int>> &vectors, - int dlg1, int dlg2) { - decoda::Sequence sequence; + std::vector<decoda::Similarity> &similarity_funcs, + std::map<std::string, std::set<int>> &vectors, + int dlg1, int dlg2) { + std::string sequence1 = ""; + std::string sequence2 = ""; - int s1_idx = alignment.topleft.first; int s2_idx = alignment.topleft.second; - + bool first = true; + for (auto &action : alignment.cigar) { - switch (action) { - case 'M': { - sequence.push_back(alignment.a[s1_idx]); - s1_idx++; - s2_idx++; - break; - } + switch (action) { + case 'M': case 'X': { - std::vector<decoda::ExtractorTypes> mismatch; size_t n_extractors = alignment.a[s1_idx].size(); - mismatch.resize(n_extractors); - for (size_t k = 0; k < n_extractors; ++k) { - if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) { - mismatch[k] = alignment.a[s1_idx][k]; - } else { - mismatch[k] = ""; - } + if (!first) { + sequence1 += ";"; + sequence2 += ";"; } - sequence.push_back(mismatch); + first = false; + sequence1 += "("; + sequence2 += "("; + for (size_t k = 0; k < n_extractors; k++) { + if (similarity_funcs[k] == decoda::continuous_similarity) { + double sim = decoda::continuous_similarity(alignment.a[s1_idx][k], alignment.b[s2_idx][k]); + if (sim >= 0.0) { + long int_a = std::lround(boost::lexical_cast<double>(alignment.a[s1_idx][k])); + long int_b = std::lround(boost::lexical_cast<double>(alignment.b[s2_idx][k])); + + sequence1 += std::to_string(int_a); + sequence2 += std::to_string(int_b); + } + } else { + if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) { + std::stringstream ss; + ss << alignment.a[s1_idx][k]; + sequence1 += ss.str(); + sequence2 += ss.str(); + } + } + + if (k != n_extractors-1) { + sequence1 += ","; + sequence2 += ","; + } + } + + sequence1 += ")"; + sequence2 += ")"; + s1_idx++; s2_idx++; break; @@ -262,69 +285,142 @@ void AddAlignmentToVectors(decoda::Alignment alignment, } } - vectors[sequence].insert(dlg1); - vectors[sequence].insert(dlg2); + vectors[sequence1].insert(dlg1); + vectors[sequence1].insert(dlg2); + vectors[sequence2].insert(dlg1); + vectors[sequence2].insert(dlg2); } void AddSubAlignmentsToVectors(decoda::Alignment alignment, - std::map<decoda::Sequence, std::set<int>> &vectors, - int dlg1, int dlg2) { + std::vector<decoda::Similarity> &similarity_funcs, + std::map<std::string, std::set<int>> &vectors, + int dlg1, int dlg2) { int s1_idx = alignment.topleft.first; + int s2_idx = alignment.topleft.second; - decoda::Sequence subsequence; + std::string sequence1 = ""; + std::string sequence2 = ""; + bool first = true; + for (auto &action : alignment.cigar) { - if (action == 'M') { - subsequence.push_back(alignment.a[s1_idx]); + if (action == 'M' || action == 'X') { + size_t n_extractors = alignment.a[s1_idx].size(); + std::string substr1 = "("; + std::string substr2 = "("; + bool completed = true; + + for (size_t k = 0; k < n_extractors; k++) { + if (similarity_funcs[k] == decoda::continuous_similarity) { + double sim = decoda::continuous_similarity(alignment.a[s1_idx][k], alignment.b[s2_idx][k]); + if (sim >= 0.0) { + long int_a = std::lround(boost::lexical_cast<double>(alignment.a[s1_idx][k])); + long int_b = std::lround(boost::lexical_cast<double>(alignment.b[s2_idx][k])); + + substr1 += std::to_string(int_a); + substr2 += std::to_string(int_b); + } else { + completed = false; + break; + } + } else { + if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) { + std::stringstream ss; + ss << alignment.a[s1_idx][k]; + substr1 += ss.str(); + substr2 += ss.str(); + } else { + completed = false; + break; + } + } + + if (k != n_extractors-1) { + substr1 += ","; + substr2 += ","; + } + } + substr1 += ")"; + substr2 += ")"; + + if (completed) { + if (!first) { + sequence1 += ";"; + sequence2 += ";"; + } + first = false; + sequence1 += substr1; + sequence2 += substr2; + } else { + if (!sequence1.empty()) { + vectors[sequence1].insert(dlg1); + vectors[sequence1].insert(dlg2); + vectors[sequence2].insert(dlg1); + vectors[sequence2].insert(dlg2); + sequence1.clear(); + sequence2.clear(); + } + first = true; + } + s1_idx++; + s2_idx++; } else { - if (!subsequence.empty()) { - vectors[subsequence].insert(dlg1); - vectors[subsequence].insert(dlg2); - - subsequence.clear(); + if (!sequence1.empty()) { + vectors[sequence1].insert(dlg1); + vectors[sequence1].insert(dlg2); + vectors[sequence2].insert(dlg1); + vectors[sequence2].insert(dlg2); + first = true; + sequence1.clear(); + sequence2.clear(); } - if (action == 'D' || action == 'X') { - s1_idx++; + if (action == 'D') { + s1_idx++; + } + if (action == 'I') { + s2_idx++; } } } - if (!subsequence.empty()) { - vectors[subsequence].insert(dlg1); - vectors[subsequence].insert(dlg2); + if (!sequence1.empty()) { + vectors[sequence1].insert(dlg1); + vectors[sequence1].insert(dlg2); + vectors[sequence2].insert(dlg1); + vectors[sequence2].insert(dlg2); } } void BuildDialogueVectors(std::vector<decoda::Sequence> &sequences, - std::vector<decoda::Similarity> &similarity_funcs, - double init_gap, double extend_gap, double min_score, - std::ostream &output, bool perfect_match) { + std::vector<decoda::Similarity> &similarity_funcs, + double init_gap, double extend_gap, double min_score, + std::ostream &output, bool perfect_match) { size_t nb_dialogues = sequences.size(); - if (!verbose) + if (verbose) std::cerr << "Building vectors..." << std::endl; - std::map<decoda::Sequence, std::set<int>> vectors; + std::map<std::string, std::set<int>> vectors; int count = 0; for (size_t i = 0; i < nb_dialogues; ++i) { for (size_t j = i+1; j < nb_dialogues; ++j) { if (verbose > 1 && count % 100 == 0) { std::cerr << "Progress: " << count << "/" - << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r"; + << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r"; std::cerr.flush(); } count++; std::vector<decoda::Alignment> alignments = decoda::WatermanEggert(sequences[i], sequences[j], - similarity_funcs, - init_gap, extend_gap, - min_score); + similarity_funcs, + init_gap, extend_gap, + min_score); for (decoda::Alignment &alignment : alignments) { - if (perfect_match) { - AddSubAlignmentsToVectors(alignment, vectors, i, j); - } else { - AddAlignmentToVectors(alignment, vectors, i, j); - } + if (perfect_match) { + AddSubAlignmentsToVectors(alignment, similarity_funcs, vectors, i, j); + } else { + AddAlignmentToVectors(alignment, similarity_funcs, vectors, i, j); + } } } } @@ -333,7 +429,7 @@ void BuildDialogueVectors(std::vector<decoda::Sequence> &sequences, std::cerr << std::endl << "Done." << std::endl; for (auto &kv : vectors) { - output << SequenceToString(kv.first); + output << kv.first; for (int value : vectors[kv.first]) { output << "\t" << value; } @@ -359,18 +455,18 @@ int main(int argc, char *argv[]) { for (;;) { static struct option long_options[] = { - {"help", no_argument, 0, 'h'}, - {"mode", required_argument, 0, 'x'}, - {"alignmethod", required_argument, 0, 'a'}, - {"dir", required_argument, 0, 'd'}, - {"output", required_argument, 0, 'o'}, - {"dialoguemap", required_argument, 0, 'm'}, - {"perfectmatch", no_argument, &perfect_match, 1}, - {"partialmatch", no_argument, &perfect_match, 0}, - {"verbose", no_argument, 0, 'v'}, - {"init-gap", required_argument, 0, 'i'}, - {"extend-gap", required_argument, 0, 'e'}, - {0, 0, 0, 0} + {"help", no_argument, 0, 'h'}, + {"mode", required_argument, 0, 'x'}, + {"alignmethod", required_argument, 0, 'a'}, + {"dir", required_argument, 0, 'd'}, + {"output", required_argument, 0, 'o'}, + {"dialoguemap", required_argument, 0, 'm'}, + {"perfectmatch", no_argument, &perfect_match, 1}, + {"partialmatch", no_argument, &perfect_match, 0}, + {"verbose", no_argument, 0, 'v'}, + {"init-gap", required_argument, 0, 'i'}, + {"extend-gap", required_argument, 0, 'e'}, + {0, 0, 0, 0} }; int option_index = 0; @@ -408,28 +504,28 @@ int main(int argc, char *argv[]) { } case 'x': { try { - mode = std::stoi(optarg); + mode = std::stoi(optarg); } catch (std::exception &e) { - std::cerr << "Argument of -x '" << optarg << "' is not an integer!"; - exit(1); + std::cerr << "Argument of -x '" << optarg << "' is not an integer!"; + exit(1); } break; } case 'i': { try { - init_gap = std::stod(optarg); + init_gap = std::stod(optarg); } catch (std::exception &e) { - std::cerr << "Argument of -i '" << optarg << "' is not a float!"; - exit(1); + std::cerr << "Argument of -i '" << optarg << "' is not a float!"; + exit(1); } break; } case 'e': { try { - extend_gap = std::stod(optarg); + extend_gap = std::stod(optarg); } catch (std::exception &e) { - std::cerr << "Argument of -e '" << optarg << "' is not a float!"; - exit(1); + std::cerr << "Argument of -e '" << optarg << "' is not a float!"; + exit(1); } break; } @@ -477,14 +573,14 @@ int main(int argc, char *argv[]) { extractors_map.insert({argv[k], ExtractorFromString(argv[k])}); } - if (!verbose) + if (verbose) std::cerr << "Parsing dialogues..." << std::endl; decoda::DialogueParser parser(stem_dir); parser.ReadAll(); if (dialoguemap_set) { - if (!verbose) + if (verbose) std::cerr << "Outputing dialogue map..." << std::endl; std::ofstream odialoguemap(dialoguemap); @@ -495,31 +591,31 @@ int main(int argc, char *argv[]) { odialoguemap.close(); } - if (!verbose) + if (verbose) std::cerr << "Number of dialogues: " << parser.dialogues.size() << std::endl; std::vector<decoda::Extractor> extractors; std::vector<decoda::Similarity> similarity_funcs; - if (!verbose) { + if (verbose) { if (mode == 0) std::cerr << "Alignment method being used: " << align_method << std::endl; else if (mode == 1) std::cerr << "Alignment method being used: WatermanEggert" << std::endl; } - if (!verbose) + if (verbose) std::cerr << "Extractors being used: "; for (auto &kv : extractors_map) { - if (!verbose) + if (verbose) std::cerr << kv.first << " "; extractors.push_back(kv.second.first); similarity_funcs.push_back(kv.second.second); } - if (!verbose) + if (verbose) std::cerr << std::endl; - if (!verbose) { + if (verbose) { std::cerr << "Gap penality affine function: " << init_gap << " + " << extend_gap << "*k" << std::endl; } diff --git a/src/modules/decision_resolver.hh b/src/modules/decision_resolver.hh index 2ead1870303700b20053b19daa7b1ccd4238195b..7896b50a81ce43de74683bd71e15c51d49a32366 100644 --- a/src/modules/decision_resolver.hh +++ b/src/modules/decision_resolver.hh @@ -8,18 +8,21 @@ namespace decoda { - typedef std::string (*Resolver)(ExtractorTypes&, ExtractorTypes&); + typedef std::string (*Resolver)(ExtractorTypes&, ExtractorTypes&, bool); - std::string BinaryDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b) { + std::string BinaryDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b, + bool strict) { if (a == b) return boost::lexical_cast<std::string>(a); return ""; } - std::string ContinuousDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b) { + std::string ContinuousDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b, + bool strict=false) { double double_a = boost::lexical_cast<double>(a); double double_b = boost::lexical_cast<double>(b); - - return std::to_string(std::lround(std::abs(double_a - double_b))); + long value = std::lround(std::abs(double_a - double_b)); + if (!strict) + return std::to_string(value); } } diff --git a/src/modules/dialogue_sequencer.cc b/src/modules/dialogue_sequencer.cc index a7ea8394ad03fe7db84dd4a72dfc1e6edaa40afd..9cdac4a02648746b3655bca835fd4d06ef9c624a 100644 --- a/src/modules/dialogue_sequencer.cc +++ b/src/modules/dialogue_sequencer.cc @@ -1,20 +1,27 @@ #include "dialogue_sequencer.hh" +#include <exception> namespace decoda { DialogueSequencer::DialogueSequencer(std::vector<Extractor> extractors) : extractors(extractors) {} Sequence DialogueSequencer::Transform(Dialogue &dialogue) { Sequence sequence; - + size_t size = 0; + for (Extractor extractor : extractors) { std::vector<ExtractorTypes> extracted = extractor(dialogue); if (sequence.empty()) { - sequence.resize(extracted.size()); + sequence.resize(extracted.size()); + size = extracted.size(); } + + if (size != extracted.size()) { + throw std::runtime_error("Extractors don't extract sequences of the same length !"); + } for (size_t k = 0; k < extracted.size(); ++k) { - sequence[k].push_back(extracted[k]); + sequence[k].push_back(extracted[k]); } } @@ -23,7 +30,7 @@ namespace decoda { std::vector<Sequence> DialogueSequencer::TransformAll(std::vector<Dialogue> &dialogues) { std::vector<Sequence> sequences; - + for (Dialogue dialogue : dialogues) { sequences.push_back(Transform(dialogue)); } diff --git a/src/modules/sequence_extractors.cc b/src/modules/sequence_extractors.cc index 78bd9c18e298207c9f376616366072e4232d4862..f35e5bc3005ca91bcf1ffda3b9937d9c31bc63f1 100644 --- a/src/modules/sequence_extractors.cc +++ b/src/modules/sequence_extractors.cc @@ -4,96 +4,121 @@ #include <exception> namespace decoda { - std::vector<ExtractorTypes> DialogueActs(Dialogue &dialogue) { - std::vector<ExtractorTypes> partial_sequence; - - for (Turn &turn : dialogue) { - if (turn[0][kAct] == "//") { - continue; - } - - std::string act = turn[0][kAct]; - partial_sequence.push_back(act); - } - - return partial_sequence; + std::vector<ExtractorTypes> partial_sequence; + + for (Turn &turn : dialogue) { + if (turn[0][kAct] == "//") { + continue; + } + + std::string act = turn[0][kAct]; + partial_sequence.push_back(act); + } + + return partial_sequence; + } + + std::vector<ExtractorTypes> TurnLength(Dialogue &dialogue) { + std::vector<ExtractorTypes> partial_sequence; + + int length = 0; + for (Turn &turn : dialogue) { + if (turn[0][kAct] == "//") { + continue; + } + length = turn.size(); + length = (length-1) / 3; + if (length > 5) + length = 5; + partial_sequence.push_back(length); + // length = 0; + } + + // if (length != 0) { + // length = (length-1) / 3; + // if (length > 5) + // length = 5; + // partial_sequence.push_back(length); + // } + + return partial_sequence; } std::vector<ExtractorTypes> TurnMeanPolarity(Dialogue &dialogue) { - std::vector<ExtractorTypes> partial_sequence; - - for (Turn &turn : dialogue) { - if (turn[0][kAct] == "//") { - continue; - } - - double total_polarity = 0.0; - int nb_words = turn.size(); - for (Word &word : turn) { - total_polarity += std::stod(word[kPolarity])*2.5; // unit is 0.4 --> 0.4*2.5 = 1 - } - double mean = total_polarity / nb_words; - partial_sequence.push_back(mean); - } - - return partial_sequence; + std::vector<ExtractorTypes> partial_sequence; + + for (Turn &turn : dialogue) { + if (turn[0][kAct] == "//") { + continue; + } + + double total_polarity = 0.0; + int nb_words = turn.size(); + for (Word &word : turn) { + total_polarity += std::stod(word[kPolarity])*2.5; // unit is 0.4 --> 0.4*2.5 = 1 + } + double mean = total_polarity / nb_words; + partial_sequence.push_back(mean); + } + + return partial_sequence; } std::vector<ExtractorTypes> TurnMeanPolarityDiscrete(Dialogue &dialogue) { - std::vector<ExtractorTypes> partial_sequence; - - for (Turn &turn : dialogue) { - if (turn[0][kAct] == "//") { - continue; - } - - double total_polarity = 0.0; - int nb_words = turn.size(); - for (Word &word : turn) { - total_polarity += std::stod(word[kPolarity]); - } - int mean = nearbyint((total_polarity / nb_words) / 0.4); - partial_sequence.push_back(mean); - - } - - return partial_sequence; + std::vector<ExtractorTypes> partial_sequence; + + for (Turn &turn : dialogue) { + if (turn[0][kAct] == "//") { + continue; + } + + double total_polarity = 0.0; + int nb_words = turn.size(); + for (Word &word : turn) { + total_polarity += std::stod(word[kPolarity]); + } + int mean = nearbyint((total_polarity / nb_words) / 0.4); + partial_sequence.push_back(mean); + + } + + return partial_sequence; } std::vector<ExtractorTypes> Speaker(Dialogue &dialogue) { - std::vector<ExtractorTypes> partial_sequence; - - for (Turn &turn : dialogue) { - if (turn[0][kAct] == "//") { - continue; - } - - std::string speaker = turn[0][kSpkResolved]; - partial_sequence.push_back(speaker); - } - - return partial_sequence; + std::vector<ExtractorTypes> partial_sequence; + + for (Turn &turn : dialogue) { + if (turn[0][kAct] == "//") { + continue; + } + + std::string speaker = turn[0][kSpkResolved]; + partial_sequence.push_back(speaker); + } + + return partial_sequence; } std::vector<ExtractorTypes> ResponseTime(Dialogue &dialogue) { - std::vector<ExtractorTypes> partial_sequence; + std::vector<ExtractorTypes> partial_sequence; - double prev_timestamp = 0.0; + double prev_timestamp = 0.0; - for (Turn &turn : dialogue) { - double current_timestamp = std::stod(turn[0][kStart]); - double delay = current_timestamp - prev_timestamp; - if (prev_timestamp == 0.0) // It's the first turn - delay = 0.0; - prev_timestamp = std::stod(turn.back()[kEnd]); - if (turn[0][kAct] == "//") { - continue; - } + for (Turn &turn : dialogue) { + double current_timestamp = std::stod(turn[0][kStart]); + double delay = current_timestamp - prev_timestamp; + if (prev_timestamp == 0.0) // It's the first turn + delay = 0.0; + prev_timestamp = std::stod(turn.back()[kEnd]); + if (turn[0][kAct] == "//") { + continue; + } - partial_sequence.push_back(delay); - } + partial_sequence.push_back(delay); + } - return partial_sequence; + return partial_sequence; } } diff --git a/src/modules/sequence_extractors.hh b/src/modules/sequence_extractors.hh index 244a8cd36a5706c9e719e67f4fd30ae84c0cd82c..b9da9b732ae6825b8c5d291e216e96dacf63dd8f 100644 --- a/src/modules/sequence_extractors.hh +++ b/src/modules/sequence_extractors.hh @@ -14,6 +14,7 @@ namespace decoda { typedef std::vector<ExtractorTypes> (*Extractor)(Dialogue &dialogue); std::vector<ExtractorTypes> DialogueActs(Dialogue &dialogue); + std::vector<ExtractorTypes> TurnLength(Dialogue &dialogue); std::vector<ExtractorTypes> TurnMeanPolarity(Dialogue &dialogue); std::vector<ExtractorTypes> TurnMeanPolarityDiscrete(Dialogue &dialogue); std::vector<ExtractorTypes> Speaker(Dialogue &dialogue); diff --git a/src/modules/similarity_functions.cc b/src/modules/similarity_functions.cc index cce1c22f5dad169e725d9d70a1d05ccb37602c8f..9b50410a8283148a6a0d9d1cd9ee0f5327170a72 100644 --- a/src/modules/similarity_functions.cc +++ b/src/modules/similarity_functions.cc @@ -13,16 +13,6 @@ namespace decoda { return -1.9; } - double response_time_similarity(ExtractorTypes &a, ExtractorTypes &b) { - double double_a = boost::get<double>(a); - double double_b = boost::get<double>(b); - - if (std::abs(double_a - double_b) <= 0.1) { - return 2; - } - return -1; - } - double continuous_similarity(ExtractorTypes &a, ExtractorTypes &b) { double double_a = boost::get<double>(a); double double_b = boost::get<double>(b);