Skip to content
Snippets Groups Projects
Commit fa7bedb1 authored by Jeremy Auguste's avatar Jeremy Auguste
Browse files

Minor fixes

parent cb26cf7b
No related branches found
No related tags found
No related merge requests found
Pipeline #
......@@ -3,7 +3,6 @@
#include "modules/sequence_extractors.hh"
#include "modules/sequence_alignment.hh"
#include "modules/similarity_functions.hh"
#include "modules/decision_resolver.hh"
#include <getopt.h>
#include <cmath>
......@@ -27,7 +26,9 @@ std::pair<decoda::Extractor, decoda::Similarity> ExtractorFromString(std::string
if (str == "speaker")
return {decoda::Speaker, decoda::speaker_similarity};
if (str == "responsetime")
return {decoda::ResponseTime, decoda::response_time_similarity};
return {decoda::ResponseTime, decoda::continuous_similarity};
if (str == "length")
return {decoda::TurnLength, decoda::identity_similarity};
throw std::invalid_argument(str + " is not a valid extractor");
}
......@@ -52,7 +53,7 @@ void HelpMessage(std::ostream &output, const std::string prog_name) {
output << "\t 1: Builds an alignment dictionnary" << std::endl;
output << "\t 2: Builds vectors for each dialogue" << std::endl;
output << "Align Methods: 'smithwaterman' or 'needlemanwunsch'" << std::endl;
output << "Extractors: 'dialogueacts', 'polarity', 'speaker' and 'responsetime'"
output << "Extractors: 'dialogueacts', 'polarity', 'speaker' and 'length'"
<< std::endl;
}
......@@ -63,7 +64,7 @@ void BuildAffinityMatrix(std::vector<decoda::Sequence> &sequences,
std::ostream &output) {
int nb_dialogues = sequences.size();
if (!verbose)
if (verbose)
std::cerr << "Building affinity matrix..." << std::endl;
int count = 0;
......@@ -181,7 +182,7 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences,
std::ostream &output, bool perfect_match) {
size_t nb_dialogues = sequences.size();
if (!verbose)
if (verbose)
std::cerr << "Building dictionnary..." << std::endl;
std::map<decoda::Sequence, int> dictionnary;
......@@ -210,7 +211,7 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences,
if (verbose > 1)
std::cerr << std::endl << "Done." << std::endl;
if (!verbose)
if (verbose)
std::cerr << "Dictionnary size: " << dictionnary.size() << std::endl;
for (auto &kv : dictionnary) {
......@@ -219,34 +220,56 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences,
}
void AddAlignmentToVectors(decoda::Alignment alignment,
std::map<decoda::Sequence, std::set<int>> &vectors,
std::vector<decoda::Similarity> &similarity_funcs,
std::map<std::string, std::set<int>> &vectors,
int dlg1, int dlg2) {
decoda::Sequence sequence;
std::string sequence1 = "";
std::string sequence2 = "";
int s1_idx = alignment.topleft.first;
int s2_idx = alignment.topleft.second;
bool first = true;
for (auto &action : alignment.cigar) {
switch (action) {
case 'M': {
sequence.push_back(alignment.a[s1_idx]);
s1_idx++;
s2_idx++;
break;
}
case 'M':
case 'X': {
std::vector<decoda::ExtractorTypes> mismatch;
size_t n_extractors = alignment.a[s1_idx].size();
mismatch.resize(n_extractors);
for (size_t k = 0; k < n_extractors; ++k) {
if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) {
mismatch[k] = alignment.a[s1_idx][k];
if (!first) {
sequence1 += ";";
sequence2 += ";";
}
first = false;
sequence1 += "(";
sequence2 += "(";
for (size_t k = 0; k < n_extractors; k++) {
if (similarity_funcs[k] == decoda::continuous_similarity) {
double sim = decoda::continuous_similarity(alignment.a[s1_idx][k], alignment.b[s2_idx][k]);
if (sim >= 0.0) {
long int_a = std::lround(boost::lexical_cast<double>(alignment.a[s1_idx][k]));
long int_b = std::lround(boost::lexical_cast<double>(alignment.b[s2_idx][k]));
sequence1 += std::to_string(int_a);
sequence2 += std::to_string(int_b);
}
} else {
mismatch[k] = "";
if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) {
std::stringstream ss;
ss << alignment.a[s1_idx][k];
sequence1 += ss.str();
sequence2 += ss.str();
}
}
sequence.push_back(mismatch);
if (k != n_extractors-1) {
sequence1 += ",";
sequence2 += ",";
}
}
sequence1 += ")";
sequence2 += ")";
s1_idx++;
s2_idx++;
break;
......@@ -262,36 +285,109 @@ void AddAlignmentToVectors(decoda::Alignment alignment,
}
}
vectors[sequence].insert(dlg1);
vectors[sequence].insert(dlg2);
vectors[sequence1].insert(dlg1);
vectors[sequence1].insert(dlg2);
vectors[sequence2].insert(dlg1);
vectors[sequence2].insert(dlg2);
}
void AddSubAlignmentsToVectors(decoda::Alignment alignment,
std::map<decoda::Sequence, std::set<int>> &vectors,
std::vector<decoda::Similarity> &similarity_funcs,
std::map<std::string, std::set<int>> &vectors,
int dlg1, int dlg2) {
int s1_idx = alignment.topleft.first;
int s2_idx = alignment.topleft.second;
std::string sequence1 = "";
std::string sequence2 = "";
bool first = true;
decoda::Sequence subsequence;
for (auto &action : alignment.cigar) {
if (action == 'M') {
subsequence.push_back(alignment.a[s1_idx]);
s1_idx++;
if (action == 'M' || action == 'X') {
size_t n_extractors = alignment.a[s1_idx].size();
std::string substr1 = "(";
std::string substr2 = "(";
bool completed = true;
for (size_t k = 0; k < n_extractors; k++) {
if (similarity_funcs[k] == decoda::continuous_similarity) {
double sim = decoda::continuous_similarity(alignment.a[s1_idx][k], alignment.b[s2_idx][k]);
if (sim >= 0.0) {
long int_a = std::lround(boost::lexical_cast<double>(alignment.a[s1_idx][k]));
long int_b = std::lround(boost::lexical_cast<double>(alignment.b[s2_idx][k]));
substr1 += std::to_string(int_a);
substr2 += std::to_string(int_b);
} else {
if (!subsequence.empty()) {
vectors[subsequence].insert(dlg1);
vectors[subsequence].insert(dlg2);
completed = false;
break;
}
} else {
if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) {
std::stringstream ss;
ss << alignment.a[s1_idx][k];
substr1 += ss.str();
substr2 += ss.str();
} else {
completed = false;
break;
}
}
subsequence.clear();
if (k != n_extractors-1) {
substr1 += ",";
substr2 += ",";
}
if (action == 'D' || action == 'X') {
}
substr1 += ")";
substr2 += ")";
if (completed) {
if (!first) {
sequence1 += ";";
sequence2 += ";";
}
first = false;
sequence1 += substr1;
sequence2 += substr2;
} else {
if (!sequence1.empty()) {
vectors[sequence1].insert(dlg1);
vectors[sequence1].insert(dlg2);
vectors[sequence2].insert(dlg1);
vectors[sequence2].insert(dlg2);
sequence1.clear();
sequence2.clear();
}
first = true;
}
s1_idx++;
s2_idx++;
} else {
if (!sequence1.empty()) {
vectors[sequence1].insert(dlg1);
vectors[sequence1].insert(dlg2);
vectors[sequence2].insert(dlg1);
vectors[sequence2].insert(dlg2);
first = true;
sequence1.clear();
sequence2.clear();
}
if (action == 'D') {
s1_idx++;
}
if (action == 'I') {
s2_idx++;
}
}
}
if (!subsequence.empty()) {
vectors[subsequence].insert(dlg1);
vectors[subsequence].insert(dlg2);
if (!sequence1.empty()) {
vectors[sequence1].insert(dlg1);
vectors[sequence1].insert(dlg2);
vectors[sequence2].insert(dlg1);
vectors[sequence2].insert(dlg2);
}
}
......@@ -302,10 +398,10 @@ void BuildDialogueVectors(std::vector<decoda::Sequence> &sequences,
size_t nb_dialogues = sequences.size();
if (!verbose)
if (verbose)
std::cerr << "Building vectors..." << std::endl;
std::map<decoda::Sequence, std::set<int>> vectors;
std::map<std::string, std::set<int>> vectors;
int count = 0;
for (size_t i = 0; i < nb_dialogues; ++i) {
for (size_t j = i+1; j < nb_dialogues; ++j) {
......@@ -321,9 +417,9 @@ void BuildDialogueVectors(std::vector<decoda::Sequence> &sequences,
min_score);
for (decoda::Alignment &alignment : alignments) {
if (perfect_match) {
AddSubAlignmentsToVectors(alignment, vectors, i, j);
AddSubAlignmentsToVectors(alignment, similarity_funcs, vectors, i, j);
} else {
AddAlignmentToVectors(alignment, vectors, i, j);
AddAlignmentToVectors(alignment, similarity_funcs, vectors, i, j);
}
}
}
......@@ -333,7 +429,7 @@ void BuildDialogueVectors(std::vector<decoda::Sequence> &sequences,
std::cerr << std::endl << "Done." << std::endl;
for (auto &kv : vectors) {
output << SequenceToString(kv.first);
output << kv.first;
for (int value : vectors[kv.first]) {
output << "\t" << value;
}
......@@ -477,14 +573,14 @@ int main(int argc, char *argv[]) {
extractors_map.insert({argv[k], ExtractorFromString(argv[k])});
}
if (!verbose)
if (verbose)
std::cerr << "Parsing dialogues..." << std::endl;
decoda::DialogueParser parser(stem_dir);
parser.ReadAll();
if (dialoguemap_set) {
if (!verbose)
if (verbose)
std::cerr << "Outputing dialogue map..." << std::endl;
std::ofstream odialoguemap(dialoguemap);
......@@ -495,31 +591,31 @@ int main(int argc, char *argv[]) {
odialoguemap.close();
}
if (!verbose)
if (verbose)
std::cerr << "Number of dialogues: " << parser.dialogues.size() << std::endl;
std::vector<decoda::Extractor> extractors;
std::vector<decoda::Similarity> similarity_funcs;
if (!verbose) {
if (verbose) {
if (mode == 0)
std::cerr << "Alignment method being used: " << align_method << std::endl;
else if (mode == 1)
std::cerr << "Alignment method being used: WatermanEggert" << std::endl;
}
if (!verbose)
if (verbose)
std::cerr << "Extractors being used: ";
for (auto &kv : extractors_map) {
if (!verbose)
if (verbose)
std::cerr << kv.first << " ";
extractors.push_back(kv.second.first);
similarity_funcs.push_back(kv.second.second);
}
if (!verbose)
if (verbose)
std::cerr << std::endl;
if (!verbose) {
if (verbose) {
std::cerr << "Gap penality affine function: " << init_gap << " + " << extend_gap << "*k" << std::endl;
}
......
......@@ -8,18 +8,21 @@
namespace decoda {
typedef std::string (*Resolver)(ExtractorTypes&, ExtractorTypes&);
typedef std::string (*Resolver)(ExtractorTypes&, ExtractorTypes&, bool);
std::string BinaryDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b) {
std::string BinaryDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b,
bool strict) {
if (a == b) return boost::lexical_cast<std::string>(a);
return "";
}
std::string ContinuousDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b) {
std::string ContinuousDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b,
bool strict=false) {
double double_a = boost::lexical_cast<double>(a);
double double_b = boost::lexical_cast<double>(b);
return std::to_string(std::lround(std::abs(double_a - double_b)));
long value = std::lround(std::abs(double_a - double_b));
if (!strict)
return std::to_string(value);
}
}
......
#include "dialogue_sequencer.hh"
#include <exception>
namespace decoda {
DialogueSequencer::DialogueSequencer(std::vector<Extractor> extractors) : extractors(extractors) {}
Sequence DialogueSequencer::Transform(Dialogue &dialogue) {
Sequence sequence;
size_t size = 0;
for (Extractor extractor : extractors) {
std::vector<ExtractorTypes> extracted = extractor(dialogue);
if (sequence.empty()) {
sequence.resize(extracted.size());
size = extracted.size();
}
if (size != extracted.size()) {
throw std::runtime_error("Extractors don't extract sequences of the same length !");
}
for (size_t k = 0; k < extracted.size(); ++k) {
......
......@@ -4,7 +4,6 @@
#include <exception>
namespace decoda {
std::vector<ExtractorTypes> DialogueActs(Dialogue &dialogue) {
std::vector<ExtractorTypes> partial_sequence;
......@@ -20,6 +19,32 @@ namespace decoda {
return partial_sequence;
}
std::vector<ExtractorTypes> TurnLength(Dialogue &dialogue) {
std::vector<ExtractorTypes> partial_sequence;
int length = 0;
for (Turn &turn : dialogue) {
if (turn[0][kAct] == "//") {
continue;
}
length = turn.size();
length = (length-1) / 3;
if (length > 5)
length = 5;
partial_sequence.push_back(length);
// length = 0;
}
// if (length != 0) {
// length = (length-1) / 3;
// if (length > 5)
// length = 5;
// partial_sequence.push_back(length);
// }
return partial_sequence;
}
std::vector<ExtractorTypes> TurnMeanPolarity(Dialogue &dialogue) {
std::vector<ExtractorTypes> partial_sequence;
......
......@@ -14,6 +14,7 @@ namespace decoda {
typedef std::vector<ExtractorTypes> (*Extractor)(Dialogue &dialogue);
std::vector<ExtractorTypes> DialogueActs(Dialogue &dialogue);
std::vector<ExtractorTypes> TurnLength(Dialogue &dialogue);
std::vector<ExtractorTypes> TurnMeanPolarity(Dialogue &dialogue);
std::vector<ExtractorTypes> TurnMeanPolarityDiscrete(Dialogue &dialogue);
std::vector<ExtractorTypes> Speaker(Dialogue &dialogue);
......
......@@ -13,16 +13,6 @@ namespace decoda {
return -1.9;
}
double response_time_similarity(ExtractorTypes &a, ExtractorTypes &b) {
double double_a = boost::get<double>(a);
double double_b = boost::get<double>(b);
if (std::abs(double_a - double_b) <= 0.1) {
return 2;
}
return -1;
}
double continuous_similarity(ExtractorTypes &a, ExtractorTypes &b) {
double double_a = boost::get<double>(a);
double double_b = boost::get<double>(b);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment