From f7290f57db96d86c44be792348e3f99e81fa9aa5 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Thu, 14 Feb 2019 07:52:46 -0500 Subject: [PATCH] Test and time --- .../Monoview/Additions/BoostUtils.py | 2 ++ .../Monoview/Additions/CQBoostUtils.py | 15 ++++++++++++++- .../Monoview/Additions/QarBoostUtils.py | 2 +- .../Monoview/MonoviewUtils.py | 1 - .../MonoviewClassifiers/Adaboost.py | 15 +++++++++++++-- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 90dca128..e7242ee6 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -760,5 +760,7 @@ def getInterpretBase(classifier, directory, classifier_name, weights, separator=',', suppress_small=True) np.savetxt(directory + "voters.csv", classifier.classification_matrix[:, classifier.chosen_columns_], delimiter=',') np.savetxt(directory + "weights.csv", classifier.weights_, delimiter=',') + np.savetxt(directory + "times.csv", np.array([classifier.train_time, classifier.predict_time]), delimiter=',') + np.savetxt(directory + "sparsity.csv", np.array([len(weights_sort)]), delimiter=',') get_accuracy_graph(classifier.train_metrics, classifier_name, directory + 'metrics.png', classifier.plotted_metric, classifier.bounds, "Boosting bound") return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py index 18367d71..16821bd7 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/CQBoostUtils.py @@ -43,6 +43,7 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): self.chosen_columns_ = [] self.n_total_hypotheses_ = n self.n_total_examples = m + self.train_shape = self.classification_matrix.shape y_kernel_matrix = np.multiply(y.reshape((len(y), 1)), self.classification_matrix) @@ -103,14 +104,26 @@ class ColumnGenerationClassifier(BaseEstimator, ClassifierMixin, BaseBoost): X = np.array(X.todense()) classification_matrix = self._binary_classification_matrix(X) - margins = np.squeeze(np.asarray(np.dot(classification_matrix, self.weights_))) + signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 end = time.time() self.predict_time = end-start + self.step_predict(classification_matrix) return signs_array + def step_predict(self, classification_matrix): + if classification_matrix.shape != self.train_shape: + self.step_decisions = np.zeros(classification_matrix.shape) + self.step_prod = np.zeros(classification_matrix.shape) + for weight_index in range(self.weights_.shape[0]-1): + margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) + signs_array = np.array([int(x) for x in sign(margins)]) + signs_array[signs_array == -1] = 0 + self.step_decisions[:, weight_index] = signs_array + self.step_prod[:, weight_index] = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) + def initialize(self): pass diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index bc2aac34..50eb7948 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -160,12 +160,12 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): logging.warning('Converting sparse matrix to dense matrix.') X = np.array(X.todense()) classification_matrix = self._binary_classification_matrix(X) - self.step_predict(classification_matrix) margins = np.sum(classification_matrix * self.weights_, axis=1) signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 end = time.time() self.predict_time = end - start + self.step_predict(classification_matrix) return signs_array def step_predict(self, classification_matrix): diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index 0d4c7829..59260507 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -31,7 +31,6 @@ def randomizedSearch(X_train, y_train, randomState, outputFileName, classifierMo nIter = nb_possible_combinations randomSearch = RandomizedSearchCV(estimator, n_iter=nIter, param_distributions=params_dict, refit=True, n_jobs=nbCores, scoring=scorer, cv=KFolds, random_state=randomState) - print(X_train) detector = randomSearch.fit(X_train, y_train) bestParams = estimator.genBestParams(detector) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 4153a0ee..71d6a839 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -1,6 +1,7 @@ from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier import numpy as np +import time from sklearn.metrics import accuracy_score from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier @@ -32,7 +33,11 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): self.step_predictions = None def fit(self, X, y, sample_weight=None): + begin = time.time() super(Adaboost, self).fit(X, y, sample_weight=sample_weight) + end = time.time() + self.train_time = end-begin + self.train_shape = X.shape self.base_predictions = np.array([estim.predict(X) for estim in self.estimators_]) self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in self.staged_predict(X)]) self.bounds = np.array([np.prod(np.sqrt(1-4*np.square(0.5-self.estimator_errors_[:i+1]))) for i in range(self.estimator_errors_.shape[0])]) @@ -42,8 +47,13 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): return True def predict(self, X): - super(Adaboost, self).predict(X) - self.step_predictions = np.array([step_pred for step_pred in self.staged_predict(X)]) + begin = time.time() + pred = super(Adaboost, self).predict(X) + end = time.time() + self.pred_time = end - begin + if X.shape != self.train_shape: + self.step_predictions = np.array([step_pred for step_pred in self.staged_predict(X)]) + return pred def getInterpret(self, directory, y_test): interpretString = "" @@ -56,6 +66,7 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): get_accuracy_graph(self.metrics, "Adaboost", directory+"metrics.png", self.plotted_metric_name, bounds=list(self.bounds)) np.savetxt(directory + "test_metrics.csv", step_test_metrics, delimiter=',') np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') + np.savetxt(directory + "times.csv", np.array([self.train_time, self.pred_time]), delimiter=',') return interpretString -- GitLab