From f7290f57db96d86c44be792348e3f99e81fa9aa5 Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Thu, 14 Feb 2019 07:52:46 -0500
Subject: [PATCH] Test and time

---
 .../Monoview/Additions/BoostUtils.py              |  2 ++
 .../Monoview/Additions/CQBoostUtils.py            | 15 ++++++++++++++-
 .../Monoview/Additions/QarBoostUtils.py           |  2 +-
 .../Monoview/MonoviewUtils.py                     |  1 -
 .../MonoviewClassifiers/Adaboost.py               | 15 +++++++++++++--
 5 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
index 90dca128..e7242ee6 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py
@@ -760,5 +760,7 @@ def getInterpretBase(classifier, directory, classifier_name, weights,
                                        separator=',', suppress_small=True)
     np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',')
     np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',')
+    np.savetxt(directory + "times.csv", np.array([classifier.train_time, classifier.predict_time]), delimiter=',')
+    np.savetxt(directory + "sparsity.csv", np.array([len(weights_sort)]), delimiter=',')
     get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds, "Boosting bound")
     return interpretString
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py
index 18367d71..16821bd7 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py
@@ -43,6 +43,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
         self.chosen_columns_ = []
         self.n_total_hypotheses_ = n
         self.n_total_examples = m
+        self.train_shape = self.classification_matrix.shape
 
         y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), self.classification_matrix)
 
@@ -103,14 +104,26 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost):
             X = np.array(X.todense())
 
         classification_matrix = self._binary_classification_matrix(X)
-
         margins = np.squeeze(np.asarray(np.dot(classification_matrix, self.weights_)))
+
         signs_array = np.array([int(x) for x in sign(margins)])
         signs_array[signs_array == -1] = 0
         end = time.time()
         self.predict_time = end-start
+        self.step_predict(classification_matrix)
         return signs_array
 
+    def step_predict(self, classification_matrix):
+        if classification_matrix.shape != self.train_shape:
+            self.step_decisions = np.zeros(classification_matrix.shape)
+            self.step_prod = np.zeros(classification_matrix.shape)
+            for weight_index in range(self.weights_.shape[0]-1):
+                margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1)
+                signs_array = np.array([int(x) for x in sign(margins)])
+                signs_array[signs_array == -1] = 0
+                self.step_decisions[:, weight_index] = signs_array
+                self.step_prod[:, weight_index] = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1)
+
     def initialize(self):
         pass
 
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
index bc2aac34..50eb7948 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
@@ -160,12 +160,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
             logging.warning('Converting sparse matrix to dense matrix.')
             X = np.array(X.todense())
         classification_matrix = self._binary_classification_matrix(X)
-        self.step_predict(classification_matrix)
         margins = np.sum(classification_matrix * self.weights_, axis=1)
         signs_array = np.array([int(x) for x in sign(margins)])
         signs_array[signs_array == -1] = 0
         end = time.time()
         self.predict_time = end - start
+        self.step_predict(classification_matrix)
         return signs_array
 
     def step_predict(self, classification_matrix):
diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py
index 0d4c7829..59260507 100644
--- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py
+++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py
@@ -31,7 +31,6 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo
             nIter = nb_possible_combinations
         randomSearch = RandomizedSearchCV(estimator, n_iter=nIter, param_distributions=params_dict, refit=True,
                                           n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState)
-        print(X_train)
         detector = randomSearch.fit(X_train, y_train)
 
         bestParams = estimator.genBestParams(detector)
diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py
index 4153a0ee..71d6a839 100644
--- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py
+++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py
@@ -1,6 +1,7 @@
 from sklearn.ensemble import AdaBoostClassifier
 from sklearn.tree import DecisionTreeClassifier
 import numpy as np
+import time
 from sklearn.metrics import accuracy_score
 
 from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier
@@ -32,7 +33,11 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
         self.step_predictions = None
 
     def fit(self, X, y, sample_weight=None):
+        begin = time.time()
         super(Adaboost, self).fit(X, y, sample_weight=sample_weight)
+        end = time.time()
+        self.train_time = end-begin
+        self.train_shape = X.shape
         self.base_predictions = np.array([estim.predict(X) for estim in self.estimators_])
         self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)])
         self.bounds = np.array([np.prod(np.sqrt(1-4*np.square(0.5-self.estimator_errors_[:i+1]))) for i in range(self.estimator_errors_.shape[0])])
@@ -42,8 +47,13 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
         return True
 
     def predict(self, X):
-        super(Adaboost, self).predict(X)
-        self.step_predictions = np.array([step_pred for step_pred in self.staged_predict(X)])
+        begin = time.time()
+        pred = super(Adaboost, self).predict(X)
+        end = time.time()
+        self.pred_time = end - begin
+        if X.shape != self.train_shape:
+            self.step_predictions = np.array([step_pred for step_pred in self.staged_predict(X)])
+        return pred
 
     def getInterpret(self, directory, y_test):
         interpretString = ""
@@ -56,6 +66,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
         get_accuracy_graph(self.metrics, "Adaboost", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds))
         np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',')
         np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',')
+        np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',')
         return interpretString
 
 
-- 
GitLab