From fa7bedb15117bd238d37e4dba7b10db033f6851d Mon Sep 17 00:00:00 2001
From: Jeremy Auguste <jeremy.auguste@etu.univ-amu.fr>
Date: Mon, 9 May 2016 11:43:51 +0200
Subject: [PATCH] Minor fixes

---
 src/alignments.cc                   | 370 ++++++++++++++++++----------
 src/modules/decision_resolver.hh    |  13 +-
 src/modules/dialogue_sequencer.cc   |  15 +-
 src/modules/sequence_extractors.cc  | 173 +++++++------
 src/modules/sequence_extractors.hh  |   1 +
 src/modules/similarity_functions.cc |  10 -
 6 files changed, 352 insertions(+), 230 deletions(-)

diff --git a/src/alignments.cc b/src/alignments.cc
index fd1854f..70aaa9d 100644
--- a/src/alignments.cc
+++ b/src/alignments.cc
@@ -3,7 +3,6 @@
 #include "modules/sequence_extractors.hh"
 #include "modules/sequence_alignment.hh"
 #include "modules/similarity_functions.hh"
-#include "modules/decision_resolver.hh"
 
 #include <getopt.h>
 #include <cmath>
@@ -27,43 +26,45 @@ std::pair<decoda::Extractor, decoda::Similarity> ExtractorFromString(std::string
   if (str == "speaker")
     return {decoda::Speaker, decoda::speaker_similarity};
   if (str == "responsetime")
-    return {decoda::ResponseTime, decoda::response_time_similarity};
+    return {decoda::ResponseTime, decoda::continuous_similarity};
+  if (str == "length")
+    return {decoda::TurnLength, decoda::identity_similarity};
 
   throw std::invalid_argument(str + " is not a valid extractor");
 }
 
 void HelpMessage(std::ostream &output, const std::string prog_name) {
-    output << "Usage: " << prog_name
-	   << " [OPTIONS] -x mode -d stem_dir extractor1 [extractor2 ...]"
-	   << std::endl;
-    output << "Options (All):" << std::endl;
-    output << "\t-o output -- Main output file (affinity matrix or dictionnary)" << std::endl;
-    output << "\t-m dialoguemap -- Output file to store the dialogue mapping in" << std::endl;
-    output << "\t-v -- Adds one level to the verbose level" << std::endl;
-    output << "\t-i -- The initial gap penality" << std::endl;
-    output << "\t-e -- The extended gap penality" << std::endl;
-    output << "Options (Mode 0):" << std::endl;
-    output << "\t-a align_method -- Alignment method to use (required with mode 0)" << std::endl;
-    output << "Options (Mode 1 and 2):" << std::endl;
-    output << "\t--perfectmatch -- Only allows perfect matches in dictionnary keys" << std::endl;
-    output << "\t--partialmatch -- Allow some gaps and mismatches in dictionnary keys" << std::endl;
-    output << "Modes:" << std::endl;
-    output << "\t 0: Builds an affinity matrix" << std::endl;
-    output << "\t 1: Builds an alignment dictionnary" << std::endl;
-    output << "\t 2: Builds vectors for each dialogue" << std::endl;
-    output << "Align Methods: 'smithwaterman' or 'needlemanwunsch'" << std::endl;
-    output << "Extractors: 'dialogueacts', 'polarity', 'speaker' and 'responsetime'"
-	   << std::endl;
+  output << "Usage: " << prog_name
+		 << " [OPTIONS] -x mode -d stem_dir extractor1 [extractor2 ...]"
+		 << std::endl;
+  output << "Options (All):" << std::endl;
+  output << "\t-o output -- Main output file (affinity matrix or dictionnary)" << std::endl;
+  output << "\t-m dialoguemap -- Output file to store the dialogue mapping in" << std::endl;
+  output << "\t-v -- Adds one level to the verbose level" << std::endl;
+  output << "\t-i -- The initial gap penality" << std::endl;
+  output << "\t-e -- The extended gap penality" << std::endl;
+  output << "Options (Mode 0):" << std::endl;
+  output << "\t-a align_method -- Alignment method to use (required with mode 0)" << std::endl;
+  output << "Options (Mode 1 and 2):" << std::endl;
+  output << "\t--perfectmatch -- Only allows perfect matches in dictionnary keys" << std::endl;
+  output << "\t--partialmatch -- Allow some gaps and mismatches in dictionnary keys" << std::endl;
+  output << "Modes:" << std::endl;
+  output << "\t 0: Builds an affinity matrix" << std::endl;
+  output << "\t 1: Builds an alignment dictionnary" << std::endl;
+  output << "\t 2: Builds vectors for each dialogue" << std::endl;
+  output << "Align Methods: 'smithwaterman' or 'needlemanwunsch'" << std::endl;
+  output << "Extractors: 'dialogueacts', 'polarity', 'speaker' and 'length'"
+		 << std::endl;
 }
 
 void BuildAffinityMatrix(std::vector<decoda::Sequence> &sequences,
-			 std::vector<decoda::Similarity> &similarity_funcs,
-			 double init_gap, double extend_gap,
-			 decoda::AlignmentFunc alignment_func,
-			 std::ostream &output) {
+						 std::vector<decoda::Similarity> &similarity_funcs,
+						 double init_gap, double extend_gap,
+						 decoda::AlignmentFunc alignment_func,
+						 std::ostream &output) {
   int nb_dialogues = sequences.size();
 
-  if (!verbose)
+  if (verbose)
     std::cerr << "Building affinity matrix..." << std::endl;
   
   int count = 0;
@@ -71,13 +72,13 @@ void BuildAffinityMatrix(std::vector<decoda::Sequence> &sequences,
     for (int j = i; j < nb_dialogues; j++) {
       if (verbose > 1 && count % 100 == 0) { 
       	std::cerr << "Progress: " << count << "/"
-		  << (nb_dialogues+1)*nb_dialogues / 2 << "\r";
+				  << (nb_dialogues+1)*nb_dialogues / 2 << "\r";
       	std::cerr.flush();
       }
       count++;
       output << decoda::AlignmentSimilarity(sequences[i], sequences[j],
-					    similarity_funcs,
-					    init_gap, extend_gap, alignment_func) << " ";
+											similarity_funcs,
+											init_gap, extend_gap, alignment_func) << " ";
     }
     output << std::endl;
   }
@@ -96,9 +97,9 @@ std::string SequenceToString(const decoda::Sequence &sequence) {
     str += "(";
     for (auto &value : tuple) {
       if (first)
-	str += boost::lexical_cast<std::string>(value);
+		str += boost::lexical_cast<std::string>(value);
       else
-	str += "," + boost::lexical_cast<std::string>(value);
+		str += "," + boost::lexical_cast<std::string>(value);
       first = false;
     }
     str += ")";
@@ -109,7 +110,7 @@ std::string SequenceToString(const decoda::Sequence &sequence) {
 }
 
 void AddAlignmentToDictionnary(decoda::Alignment &alignment,
-			       std::map<decoda::Sequence, int> &dictionnary) {
+							   std::map<decoda::Sequence, int> &dictionnary) {
   decoda::Sequence sequence;
   std::vector<decoda::ExtractorTypes> mismatch;
 
@@ -142,7 +143,7 @@ void AddAlignmentToDictionnary(decoda::Alignment &alignment,
 }
 
 void AddSubAlignmentsToDictionnary(decoda::Alignment &alignment,
-				   std::map<decoda::Sequence, int> &dictionnary) {
+								   std::map<decoda::Sequence, int> &dictionnary) {
   int s1_idx = alignment.topleft.first;
 	
   decoda::Sequence subsequence;
@@ -152,16 +153,16 @@ void AddSubAlignmentsToDictionnary(decoda::Alignment &alignment,
       s1_idx++;
     } else {
       if (!subsequence.empty()) {
-	if (dictionnary.count(subsequence)) {
-	  dictionnary[subsequence]++;
-	} else {
-	  dictionnary[subsequence] = 1;
-	}
+		if (dictionnary.count(subsequence)) {
+		  dictionnary[subsequence]++;
+		} else {
+		  dictionnary[subsequence] = 1;
+		}
 
-	subsequence.clear();
+		subsequence.clear();
       }
       if (action == 'D' || action == 'X') {
-	s1_idx++;
+		s1_idx++;
       }
     }
   }
@@ -176,12 +177,12 @@ void AddSubAlignmentsToDictionnary(decoda::Alignment &alignment,
 }
 
 void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences,
-			       std::vector<decoda::Similarity> &similarity_funcs,
-			       double init_gap, double extend_gap, double min_score,
-			       std::ostream &output, bool perfect_match) {
+							   std::vector<decoda::Similarity> &similarity_funcs,
+							   double init_gap, double extend_gap, double min_score,
+							   std::ostream &output, bool perfect_match) {
   size_t nb_dialogues = sequences.size();
 
-  if (!verbose)
+  if (verbose)
     std::cerr << "Building dictionnary..." << std::endl;
 
   std::map<decoda::Sequence, int> dictionnary;
@@ -190,19 +191,19 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences,
     for (size_t j = i+1; j < nb_dialogues; ++j) {
       if (verbose > 1 && count % 100 == 0) { 
       	std::cerr << "Progress: " << count << "/"
-		  << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r";
+				  << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r";
       	std::cerr.flush();
       }
       count++;
       std::vector<decoda::Alignment> alignments = decoda::WatermanEggert(sequences[i], sequences[j],
-									 similarity_funcs,
-									 init_gap, extend_gap,
-									 min_score);
+																		 similarity_funcs,
+																		 init_gap, extend_gap,
+																		 min_score);
       for (decoda::Alignment &alignment : alignments) {
-	if (perfect_match)
-	  AddSubAlignmentsToDictionnary(alignment, dictionnary);
-	else
-	  AddAlignmentToDictionnary(alignment, dictionnary);
+		if (perfect_match)
+		  AddSubAlignmentsToDictionnary(alignment, dictionnary);
+		else
+		  AddAlignmentToDictionnary(alignment, dictionnary);
       }
     }
   }
@@ -210,7 +211,7 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences,
   if (verbose > 1)
     std::cerr << std::endl << "Done." << std::endl;
 
-  if (!verbose)
+  if (verbose)
     std::cerr << "Dictionnary size: " << dictionnary.size() << std::endl;
 
   for (auto &kv : dictionnary) {
@@ -219,34 +220,56 @@ void BuildAlignmentDictionnary(std::vector<decoda::Sequence> &sequences,
 }
 
 void AddAlignmentToVectors(decoda::Alignment alignment,
-			   std::map<decoda::Sequence, std::set<int>> &vectors,
-			   int dlg1, int dlg2) {
-  decoda::Sequence sequence;
+						   std::vector<decoda::Similarity> &similarity_funcs,
+						   std::map<std::string, std::set<int>> &vectors,
+						   int dlg1, int dlg2) {
+  std::string sequence1 = "";
+  std::string sequence2 = "";
   
-
   int s1_idx = alignment.topleft.first;
   int s2_idx = alignment.topleft.second;
-
+  bool first = true;
+  
   for (auto &action : alignment.cigar) {
-    switch (action) {  
-    case 'M': {
-      sequence.push_back(alignment.a[s1_idx]);
-      s1_idx++;
-      s2_idx++;
-      break;
-    }
+    switch (action) {
+    case 'M':
     case 'X': {
-      std::vector<decoda::ExtractorTypes> mismatch;
       size_t n_extractors = alignment.a[s1_idx].size();
-      mismatch.resize(n_extractors);
-      for (size_t k = 0; k < n_extractors; ++k) {
-    	if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) {
-    	  mismatch[k] = alignment.a[s1_idx][k];
-    	} else {
-    	  mismatch[k] = "";
-    	}
+      if (!first) {
+		sequence1 += ";";
+		sequence2 += ";";
       }
-      sequence.push_back(mismatch);
+      first = false;
+      sequence1 += "(";
+      sequence2 += "(";
+      for (size_t k = 0; k < n_extractors; k++) {
+		if (similarity_funcs[k] == decoda::continuous_similarity) {
+		  double sim = decoda::continuous_similarity(alignment.a[s1_idx][k], alignment.b[s2_idx][k]);
+		  if (sim >= 0.0) {
+			long int_a = std::lround(boost::lexical_cast<double>(alignment.a[s1_idx][k]));
+			long int_b = std::lround(boost::lexical_cast<double>(alignment.b[s2_idx][k]));
+
+			sequence1 += std::to_string(int_a);
+			sequence2 += std::to_string(int_b);
+		  }
+		} else {
+		  if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) {
+			std::stringstream ss;
+			ss << alignment.a[s1_idx][k];
+			sequence1 += ss.str();
+			sequence2 += ss.str();
+		  }
+		}
+	
+		if (k != n_extractors-1) {
+		  sequence1 += ",";
+		  sequence2 += ",";
+		}
+      }
+
+      sequence1 += ")";
+      sequence2 += ")";
+      
       s1_idx++;
       s2_idx++;
       break;
@@ -262,69 +285,142 @@ void AddAlignmentToVectors(decoda::Alignment alignment,
     }
   }
 
-  vectors[sequence].insert(dlg1);
-  vectors[sequence].insert(dlg2);
+  vectors[sequence1].insert(dlg1);
+  vectors[sequence1].insert(dlg2);
+  vectors[sequence2].insert(dlg1);
+  vectors[sequence2].insert(dlg2);
 }
 
 void AddSubAlignmentsToVectors(decoda::Alignment alignment,
-			       std::map<decoda::Sequence, std::set<int>> &vectors,
-			       int dlg1, int dlg2) {
+							   std::vector<decoda::Similarity> &similarity_funcs,
+							   std::map<std::string, std::set<int>> &vectors,
+							   int dlg1, int dlg2) {
   int s1_idx = alignment.topleft.first;
+  int s2_idx = alignment.topleft.second;
 	
-  decoda::Sequence subsequence;
+  std::string sequence1 = "";
+  std::string sequence2 = "";
+  bool first = true;
+  
   for (auto &action : alignment.cigar) {
-    if (action == 'M') {
-      subsequence.push_back(alignment.a[s1_idx]);
+    if (action == 'M' || action == 'X') {
+      size_t n_extractors = alignment.a[s1_idx].size();
+      std::string substr1 = "(";
+      std::string substr2 = "(";
+      bool completed = true;
+
+      for (size_t k = 0; k < n_extractors; k++) {
+		if (similarity_funcs[k] == decoda::continuous_similarity) {
+		  double sim = decoda::continuous_similarity(alignment.a[s1_idx][k], alignment.b[s2_idx][k]);
+		  if (sim >= 0.0) {
+			long int_a = std::lround(boost::lexical_cast<double>(alignment.a[s1_idx][k]));
+			long int_b = std::lround(boost::lexical_cast<double>(alignment.b[s2_idx][k]));
+
+			substr1 += std::to_string(int_a);
+			substr2 += std::to_string(int_b);
+		  } else {
+			completed = false;
+			break;
+		  }
+		} else {
+		  if (alignment.a[s1_idx][k] == alignment.b[s2_idx][k]) {
+			std::stringstream ss;
+			ss << alignment.a[s1_idx][k];
+			substr1 += ss.str();
+			substr2 += ss.str();
+		  } else {
+			completed = false;
+			break;
+		  }
+		}
+	
+		if (k != n_extractors-1) {
+		  substr1 += ",";
+		  substr2 += ",";
+		}
+      }
+      substr1 += ")";
+      substr2 += ")";
+
+      if (completed) {
+		if (!first) {
+		  sequence1 += ";";
+		  sequence2 += ";";
+		}
+		first = false;
+		sequence1 += substr1;
+		sequence2 += substr2;
+      } else {
+		if (!sequence1.empty()) {
+		  vectors[sequence1].insert(dlg1);
+		  vectors[sequence1].insert(dlg2);
+		  vectors[sequence2].insert(dlg1);
+		  vectors[sequence2].insert(dlg2);
+		  sequence1.clear();
+		  sequence2.clear();
+		}
+		first = true;
+      }
+
       s1_idx++;
+      s2_idx++;
     } else {
-      if (!subsequence.empty()) {
-	vectors[subsequence].insert(dlg1);
-	vectors[subsequence].insert(dlg2);
-
-	subsequence.clear();
+      if (!sequence1.empty()) {
+		vectors[sequence1].insert(dlg1);
+		vectors[sequence1].insert(dlg2);
+		vectors[sequence2].insert(dlg1);
+		vectors[sequence2].insert(dlg2);
+		first = true;
+		sequence1.clear();
+		sequence2.clear();
       }
-      if (action == 'D' || action == 'X') {
-	s1_idx++;
+      if (action == 'D') {
+		s1_idx++;
+      }
+      if (action == 'I') {
+		s2_idx++;
       }
     }
   }
 
-  if (!subsequence.empty()) {
-    vectors[subsequence].insert(dlg1);
-    vectors[subsequence].insert(dlg2);
+  if (!sequence1.empty()) {
+    vectors[sequence1].insert(dlg1);
+    vectors[sequence1].insert(dlg2);
+    vectors[sequence2].insert(dlg1);
+    vectors[sequence2].insert(dlg2);
   }
 }
 
 void BuildDialogueVectors(std::vector<decoda::Sequence> &sequences,
-			  std::vector<decoda::Similarity> &similarity_funcs,
-			  double init_gap, double extend_gap, double min_score,
-			  std::ostream &output, bool perfect_match) {
+						  std::vector<decoda::Similarity> &similarity_funcs,
+						  double init_gap, double extend_gap, double min_score,
+						  std::ostream &output, bool perfect_match) {
 
   size_t nb_dialogues = sequences.size();
 
-  if (!verbose)
+  if (verbose)
     std::cerr << "Building vectors..." << std::endl;
 
-  std::map<decoda::Sequence, std::set<int>> vectors;
+  std::map<std::string, std::set<int>> vectors;
   int count = 0;
   for (size_t i = 0; i < nb_dialogues; ++i) {
     for (size_t j = i+1; j < nb_dialogues; ++j) {
       if (verbose > 1 && count % 100 == 0) { 
       	std::cerr << "Progress: " << count << "/"
-		  << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r";
+				  << (nb_dialogues+1)*nb_dialogues / 2 - nb_dialogues << "\r";
       	std::cerr.flush();
       }
       count++;
       std::vector<decoda::Alignment> alignments = decoda::WatermanEggert(sequences[i], sequences[j],
-									 similarity_funcs,
-									 init_gap, extend_gap,
-									 min_score);
+																		 similarity_funcs,
+																		 init_gap, extend_gap,
+																		 min_score);
       for (decoda::Alignment &alignment : alignments) {
-	if (perfect_match) {
-	  AddSubAlignmentsToVectors(alignment, vectors, i, j);
-	} else {
-	  AddAlignmentToVectors(alignment, vectors, i, j);
-	}
+		if (perfect_match) {
+		  AddSubAlignmentsToVectors(alignment, similarity_funcs, vectors, i, j);
+		} else {
+		  AddAlignmentToVectors(alignment, similarity_funcs, vectors, i, j);
+		}
       }
     }
   }
@@ -333,7 +429,7 @@ void BuildDialogueVectors(std::vector<decoda::Sequence> &sequences,
     std::cerr << std::endl << "Done." << std::endl;
 
   for (auto &kv : vectors) {
-    output << SequenceToString(kv.first);
+    output << kv.first;
     for (int value : vectors[kv.first]) {
       output << "\t" << value;
     }
@@ -359,18 +455,18 @@ int main(int argc, char *argv[]) {
   for (;;) {
     static struct option long_options[] =
       {
-	{"help", no_argument, 0, 'h'},
-	{"mode", required_argument, 0, 'x'},
-	{"alignmethod", required_argument, 0, 'a'},
-	{"dir", required_argument, 0, 'd'},
-	{"output", required_argument, 0, 'o'},
-	{"dialoguemap", required_argument, 0, 'm'},
-	{"perfectmatch", no_argument, &perfect_match, 1},
-	{"partialmatch", no_argument, &perfect_match, 0},
-	{"verbose", no_argument, 0, 'v'},
-	{"init-gap", required_argument, 0, 'i'},
-	{"extend-gap", required_argument, 0, 'e'},
-	{0, 0, 0, 0}
+		{"help", no_argument, 0, 'h'},
+		{"mode", required_argument, 0, 'x'},
+		{"alignmethod", required_argument, 0, 'a'},
+		{"dir", required_argument, 0, 'd'},
+		{"output", required_argument, 0, 'o'},
+		{"dialoguemap", required_argument, 0, 'm'},
+		{"perfectmatch", no_argument, &perfect_match, 1},
+		{"partialmatch", no_argument, &perfect_match, 0},
+		{"verbose", no_argument, 0, 'v'},
+		{"init-gap", required_argument, 0, 'i'},
+		{"extend-gap", required_argument, 0, 'e'},
+		{0, 0, 0, 0}
       };
 
     int option_index = 0;
@@ -408,28 +504,28 @@ int main(int argc, char *argv[]) {
     }
     case 'x': {
       try {
-	mode = std::stoi(optarg);
+		mode = std::stoi(optarg);
       } catch (std::exception &e) {
-	std::cerr << "Argument of -x '" << optarg << "' is not an integer!";
-	exit(1);
+		std::cerr << "Argument of -x '" << optarg << "' is not an integer!";
+		exit(1);
       }
       break;
     }
     case 'i': {
       try {
-	init_gap = std::stod(optarg);
+		init_gap = std::stod(optarg);
       } catch (std::exception &e) {
-	std::cerr << "Argument of -i '" << optarg << "' is not a float!";
-	exit(1);
+		std::cerr << "Argument of -i '" << optarg << "' is not a float!";
+		exit(1);
       }
       break;
     }
     case 'e': {
       try {
-	extend_gap = std::stod(optarg);
+		extend_gap = std::stod(optarg);
       } catch (std::exception &e) {
-	std::cerr << "Argument of -e '" << optarg << "' is not a float!";
-	exit(1);
+		std::cerr << "Argument of -e '" << optarg << "' is not a float!";
+		exit(1);
       }
       break;
     }
@@ -477,14 +573,14 @@ int main(int argc, char *argv[]) {
     extractors_map.insert({argv[k], ExtractorFromString(argv[k])});
   }
 
-  if (!verbose)
+  if (verbose)
     std::cerr << "Parsing dialogues..." << std::endl;
   decoda::DialogueParser parser(stem_dir);
 
   parser.ReadAll();
 
   if (dialoguemap_set) {
-    if (!verbose)
+    if (verbose)
       std::cerr << "Outputing dialogue map..." << std::endl;
     std::ofstream odialoguemap(dialoguemap);
 
@@ -495,31 +591,31 @@ int main(int argc, char *argv[]) {
     odialoguemap.close();
   }
 
-  if (!verbose)
+  if (verbose)
     std::cerr << "Number of dialogues: " << parser.dialogues.size() << std::endl;
 
   std::vector<decoda::Extractor> extractors;
   std::vector<decoda::Similarity> similarity_funcs;
 
-  if (!verbose) {
+  if (verbose) {
     if (mode == 0)
       std::cerr << "Alignment method being used: " << align_method << std::endl;
     else if (mode == 1)
       std::cerr << "Alignment method being used: WatermanEggert" << std::endl;
   }
 
-  if (!verbose)
+  if (verbose)
     std::cerr << "Extractors being used: ";
   for (auto &kv : extractors_map) {
-    if (!verbose)
+    if (verbose)
       std::cerr << kv.first << " ";
     extractors.push_back(kv.second.first);
     similarity_funcs.push_back(kv.second.second);
   }
-  if (!verbose)
+  if (verbose)
     std::cerr << std::endl;
 
-  if (!verbose) {
+  if (verbose) {
     std::cerr << "Gap penality affine function: " << init_gap << " + " << extend_gap << "*k" << std::endl;
   }
   
diff --git a/src/modules/decision_resolver.hh b/src/modules/decision_resolver.hh
index 2ead187..7896b50 100644
--- a/src/modules/decision_resolver.hh
+++ b/src/modules/decision_resolver.hh
@@ -8,18 +8,21 @@
 
 namespace decoda {
 
-  typedef std::string (*Resolver)(ExtractorTypes&, ExtractorTypes&);
+  typedef std::string (*Resolver)(ExtractorTypes&, ExtractorTypes&, bool);
 
-  std::string BinaryDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b) {
+  std::string BinaryDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b,
+				     bool strict) {
     if (a == b) return boost::lexical_cast<std::string>(a);
     return "";
   }
 
-  std::string ContinuousDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b) {
+  std::string ContinuousDecisionResolver(decoda::ExtractorTypes &a, decoda::ExtractorTypes &b,
+					 bool strict=false) {
     double double_a = boost::lexical_cast<double>(a);
     double double_b = boost::lexical_cast<double>(b);
-
-    return std::to_string(std::lround(std::abs(double_a - double_b)));
+    long value = std::lround(std::abs(double_a - double_b));
+    if (!strict)
+      return std::to_string(value);
   }
   
 }
diff --git a/src/modules/dialogue_sequencer.cc b/src/modules/dialogue_sequencer.cc
index a7ea839..9cdac4a 100644
--- a/src/modules/dialogue_sequencer.cc
+++ b/src/modules/dialogue_sequencer.cc
@@ -1,20 +1,27 @@
 #include "dialogue_sequencer.hh"
+#include <exception>
 
 namespace decoda {
   DialogueSequencer::DialogueSequencer(std::vector<Extractor> extractors) : extractors(extractors) {}
 
   Sequence DialogueSequencer::Transform(Dialogue &dialogue) {
     Sequence sequence;
-
+	size_t size = 0;
+	
     for (Extractor extractor : extractors) {
       std::vector<ExtractorTypes> extracted = extractor(dialogue);
 
       if (sequence.empty()) {
-	sequence.resize(extracted.size());
+		sequence.resize(extracted.size());
+		size = extracted.size();
       }
+
+	  if (size != extracted.size()) {
+		throw std::runtime_error("Extractors don't extract sequences of the same length !");
+	  }
       
       for (size_t k = 0; k < extracted.size(); ++k) {
-	sequence[k].push_back(extracted[k]);
+		sequence[k].push_back(extracted[k]);
       }
     }
     
@@ -23,7 +30,7 @@ namespace decoda {
 
   std::vector<Sequence> DialogueSequencer::TransformAll(std::vector<Dialogue> &dialogues) {
     std::vector<Sequence> sequences;
-
+	
     for (Dialogue dialogue : dialogues) {
       sequences.push_back(Transform(dialogue));
     }
diff --git a/src/modules/sequence_extractors.cc b/src/modules/sequence_extractors.cc
index 78bd9c1..f35e5bc 100644
--- a/src/modules/sequence_extractors.cc
+++ b/src/modules/sequence_extractors.cc
@@ -4,96 +4,121 @@
 #include <exception>
 
 namespace decoda {
-  
   std::vector<ExtractorTypes> DialogueActs(Dialogue &dialogue) {
-    std::vector<ExtractorTypes> partial_sequence;
-
-    for (Turn &turn : dialogue) {
-      if (turn[0][kAct] == "//") {
-	continue;
-      }
-      
-      std::string act = turn[0][kAct];
-      partial_sequence.push_back(act);
-    }
-    
-    return partial_sequence;
+	std::vector<ExtractorTypes> partial_sequence;
+
+	for (Turn &turn : dialogue) {
+	  if (turn[0][kAct] == "//") {
+		continue;
+	  }
+			
+	  std::string act = turn[0][kAct];
+	  partial_sequence.push_back(act);
+	}
+		
+	return partial_sequence;
+  }
+
+  std::vector<ExtractorTypes> TurnLength(Dialogue &dialogue) {
+	std::vector<ExtractorTypes> partial_sequence;
+
+	int length = 0;
+	for (Turn &turn : dialogue) {
+	  if (turn[0][kAct] == "//") {
+		continue;
+	  }
+	  length = turn.size();
+	  length = (length-1) / 3;
+	  if (length > 5)
+		length = 5;
+	  partial_sequence.push_back(length);
+	  // length = 0;
+	}
+
+	// if (length != 0) {
+	//   length = (length-1) / 3;
+	//   if (length > 5)
+	// 	length = 5;
+	//   partial_sequence.push_back(length);
+	// }
+
+	return partial_sequence;
   }
 
   std::vector<ExtractorTypes> TurnMeanPolarity(Dialogue &dialogue) {
-    std::vector<ExtractorTypes> partial_sequence;
-    
-    for (Turn &turn : dialogue) {
-      if (turn[0][kAct] == "//") {
-	continue;
-      }
-      
-      double total_polarity = 0.0;
-      int nb_words = turn.size();
-      for (Word &word : turn) {
-	total_polarity += std::stod(word[kPolarity])*2.5; // unit is 0.4 --> 0.4*2.5 = 1
-      }
-      double mean = total_polarity / nb_words;
-      partial_sequence.push_back(mean);
-    }
-
-    return partial_sequence;
+	std::vector<ExtractorTypes> partial_sequence;
+		
+	for (Turn &turn : dialogue) {
+	  if (turn[0][kAct] == "//") {
+		continue;
+	  }
+			
+	  double total_polarity = 0.0;
+	  int nb_words = turn.size();
+	  for (Word &word : turn) {
+		total_polarity += std::stod(word[kPolarity])*2.5; // unit is 0.4 --> 0.4*2.5 = 1
+	  }
+	  double mean = total_polarity / nb_words;
+	  partial_sequence.push_back(mean);
+	}
+
+	return partial_sequence;
   }
 
   std::vector<ExtractorTypes> TurnMeanPolarityDiscrete(Dialogue &dialogue) {
-    std::vector<ExtractorTypes> partial_sequence;
-    
-    for (Turn &turn : dialogue) {
-      if (turn[0][kAct] == "//") {
-	continue;
-      }
-      
-      double total_polarity = 0.0;
-      int nb_words = turn.size();
-      for (Word &word : turn) {
-	total_polarity += std::stod(word[kPolarity]);
-      }
-      int mean = nearbyint((total_polarity / nb_words) / 0.4);
-      partial_sequence.push_back(mean);
-      
-    }
-
-    return partial_sequence;
+	std::vector<ExtractorTypes> partial_sequence;
+		
+	for (Turn &turn : dialogue) {
+	  if (turn[0][kAct] == "//") {
+		continue;
+	  }
+			
+	  double total_polarity = 0.0;
+	  int nb_words = turn.size();
+	  for (Word &word : turn) {
+		total_polarity += std::stod(word[kPolarity]);
+	  }
+	  int mean = nearbyint((total_polarity / nb_words) / 0.4);
+	  partial_sequence.push_back(mean);
+			
+	}
+
+	return partial_sequence;
   }
 
   std::vector<ExtractorTypes> Speaker(Dialogue &dialogue) {
-    std::vector<ExtractorTypes> partial_sequence;
-
-    for (Turn &turn : dialogue) {
-      if (turn[0][kAct] == "//") {
-	continue;
-      }
-
-      std::string speaker = turn[0][kSpkResolved];
-      partial_sequence.push_back(speaker);
-    }
-    
-    return partial_sequence;
+	std::vector<ExtractorTypes> partial_sequence;
+
+	for (Turn &turn : dialogue) {
+	  if (turn[0][kAct] == "//") {
+		continue;
+	  }
+
+	  std::string speaker = turn[0][kSpkResolved];
+	  partial_sequence.push_back(speaker);
+	}
+		
+	return partial_sequence;
   }
 
   std::vector<ExtractorTypes> ResponseTime(Dialogue &dialogue) {
-    std::vector<ExtractorTypes> partial_sequence;
+	std::vector<ExtractorTypes> partial_sequence;
 
-    double prev_timestamp = 0.0;
+	double prev_timestamp = 0.0;
 
-    for (Turn &turn : dialogue) {
-      double current_timestamp = std::stod(turn[0][kStart]);
-      double delay = current_timestamp - prev_timestamp;
-      if (prev_timestamp == 0.0) // It's the first turn
-	delay = 0.0;
-      prev_timestamp = std::stod(turn.back()[kEnd]);
-      if (turn[0][kAct] == "//") {
-	continue;
-      }
+	for (Turn &turn : dialogue) {
+	  double current_timestamp = std::stod(turn[0][kStart]);
+	  double delay = current_timestamp - prev_timestamp;
+	  if (prev_timestamp == 0.0) // It's the first turn
+		delay = 0.0;
+	  prev_timestamp = std::stod(turn.back()[kEnd]);
+	  if (turn[0][kAct] == "//") {
+		continue;
+	  }
 
-      partial_sequence.push_back(delay);
-    }
+	  partial_sequence.push_back(delay);
+	}
 
-    return partial_sequence;
+	return partial_sequence;
   }
 }
diff --git a/src/modules/sequence_extractors.hh b/src/modules/sequence_extractors.hh
index 244a8cd..b9da9b7 100644
--- a/src/modules/sequence_extractors.hh
+++ b/src/modules/sequence_extractors.hh
@@ -14,6 +14,7 @@ namespace decoda {
   typedef std::vector<ExtractorTypes> (*Extractor)(Dialogue &dialogue);
   
   std::vector<ExtractorTypes> DialogueActs(Dialogue &dialogue);
+  std::vector<ExtractorTypes> TurnLength(Dialogue &dialogue);
   std::vector<ExtractorTypes> TurnMeanPolarity(Dialogue &dialogue);
   std::vector<ExtractorTypes> TurnMeanPolarityDiscrete(Dialogue &dialogue);
   std::vector<ExtractorTypes> Speaker(Dialogue &dialogue);
diff --git a/src/modules/similarity_functions.cc b/src/modules/similarity_functions.cc
index cce1c22..9b50410 100644
--- a/src/modules/similarity_functions.cc
+++ b/src/modules/similarity_functions.cc
@@ -13,16 +13,6 @@ namespace decoda {
     return -1.9;
   }
 
-  double response_time_similarity(ExtractorTypes &a, ExtractorTypes &b) {
-    double double_a = boost::get<double>(a);
-    double double_b = boost::get<double>(b);
-
-    if (std::abs(double_a - double_b) <= 0.1) {
-      return 2;
-    }
-    return -1;
-  }
-
   double continuous_similarity(ExtractorTypes &a, ExtractorTypes &b) {
     double double_a = boost::get<double>(a);
     double double_b = boost::get<double>(b);
-- 
GitLab