diff --git a/config_files/config_cuisine.yml b/config_files/config_cuisine.yml index a60012a801e17433020480cc1b47170c0dba9c54..5a9161460377d8e9b461acc0d4d81ea38a12f0c4 100644 --- a/config_files/config_cuisine.yml +++ b/config_files/config_cuisine.yml @@ -1,14 +1,14 @@ # The base configuration of the benchmark log: True -name: ["test_boules"] -label: "_1_3" +name: ["ionosphere", "abalone", "australian", "balance", "bupa", "cylinder", "hepatitis", "pima", "yeast", "zoo"] +label: "comp_1" file_type: ".hdf5" views: -pathf: "/home/baptiste/Documents/Clouded/short_projects/latent_space_study/" +pathf: "/home/baptiste/Documents/Datasets/UCI/both/" nice: 0 random_state: 42 nb_cores: 1 -full: False +full: True debug: True add_noise: False noise_std: 0.0 @@ -17,74 +17,125 @@ track_tracebacks: False # All the classification-realted configuration options multiclass_method: "oneVersusOne" -split: 0.10 +split: 0.50 nb_folds: 5 -nb_class: 4 +nb_class: 2 classes: -type: ["multiview","monoview"] -algos_monoview: ["cb_boost", "decision_tree", 'random_forest'] -algos_multiview: ["mv_cb_boost", "weighted_linear_late_fusion","weighted_linear_early_fusion","mumbo" ] +type: ["monoview",] +algos_monoview: ["cb_boost", "self_opt_cb", "adaboost", "cq_boost", "min_cq", "adaboost_pregen", "self_opt_cb_pseudo", "self_opt_cb_root"] +algos_multiview: ["mv_cb_boost","early_fusion_dt", "early_fusion_cb", "early_fusion_rf","mumbo", "early_fusion_svm" ] stats_iter: 5 metrics: accuracy_score: {} f1_score: average: 'micro' metric_princ: "accuracy_score" -hps_type: "Random" +hps_type: "None" hps_args: - n_iter: 10 + n_iter: 30 equivalent_draws: True +svm_rbf: + C: 0.7 + cb_boost: - n_stumps: 30 - n_max_iterations: 20 - estimators_generator: "Trees" - max_depth: 1 + n_stumps: 1 + n_max_iterations: 10 + estimators_generator: "Stumps" + +cq_boost: + n_max_iterations: 10 + n_stumps: 1 + +min_cq: + n_stumps_per_attribute: 1 + +adaboost: + n_estimators: 10 + +adaboost_pregen: + n_estimators: 10 + n_stumps: 1 + decision_tree: max_depth: 2 + mumbo: - base_estimator: decision_tree - base_estimator__max_depth: 1 - n_estimators: 80 + base_estimator: + - svm_rbf: + C: 0.001 + - svm_rbf: + C: 0.001 + - decision_tree: + max_depth: 1 + - decision_tree: + max_depth: 1 + n_estimators: 100 mv_cb_boost: - n_max_iterations: 80 - n_stumps: 30 - estimators_generator: "Trees" - max_depth: 1 + n_estimators: 100 + base_estimator: ["Stumps", "Stumps", "Stumps", "Stumps"] + base_estimator__n_stumps: [50, 50, 50, 50] + base_estimator__check_diff: False + base_estimator__C: 0.001 + base_estimator__kernel: "rbf" + base_estimator__max_depth: 2 + base_estimator__distribution_type: "uniform" + base_estimator__low: 0 + base_estimator__high: 10 + base_estimator__attributes_ratio: 0.5 + base_estimator__examples_ratio: 0.55 -pb_mv_boost: - num_iterations: 20 - decision_tree_depth: 1 -weighted_linear_early_fusion: - monoview_classifier_name: "cb_boost" +early_fusion_cb: monoview_classifier_config: cb_boost: - n_stumps: 30 - n_max_iterations: 20 - estimators_generator: "Trees" + n_estimators: 100 + base_estimator__max_depth: 1 +early_fusion_dt: + monoview_classifier_config: + decision_tree: + max_depth: 2 +early_fusion_rf: + monoview_classifier_config: + random_forest: + n_estimators: 100 max_depth: 1 -weighted_linear_late_fusion: - classifiers_names: ["cb_boost", "cb_boost", "cb_boost", "cb_boost"] - classifier_configs: - - cb_boost: - n_stumps: 30 - n_max_iterations: 20 - estimators_generator: "Trees" - max_depth: 1 - - cb_boost: - n_stumps: 30 - n_max_iterations: 20 - estimators_generator: "Trees" - max_depth: 1 - - cb_boost: - n_stumps: 30 - n_max_iterations: 20 - estimators_generator: "Trees" - max_depth: 1 - - cb_boost: - n_stumps: 30 - n_max_iterations: 20 - estimators_generator: "Trees" - max_depth: 1 - +early_fusion_svm: + monoview_classifier_config: + svm_rbf: + C: 0.7 +#pb_mv_boost: +# num_iterations: 20 +# decision_tree_depth: 1 +#weighted_linear_early_fusion: +# monoview_classifier_name: "cb_boost" +# monoview_classifier_config: +# cb_boost: +# n_stumps: 30 +# n_max_iterations: 20 +# estimators_generator: "Trees" +# max_depth: 1 +#weighted_linear_late_fusion: +# classifiers_names: ["cb_boost", "cb_boost", "cb_boost", "cb_boost"] +# classifier_configs: +# - cb_boost: +# n_stumps: 30 +# n_max_iterations: 20 +# estimators_generator: "Trees" +# max_depth: 1 +# - cb_boost: +# n_stumps: 30 +# n_max_iterations: 20 +# estimators_generator: "Trees" +# max_depth: 1 +# - cb_boost: +# n_stumps: 30 +# n_max_iterations: 20 +# estimators_generator: "Trees" +# max_depth: 1 +# - cb_boost: +# n_stumps: 30 +# n_max_iterations: 20 +# estimators_generator: "Trees" +# max_depth: 1 +# diff --git a/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py b/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py index 604da04328de6191300cb1efffd0fe3f1fd368a2..f0fbd9551070b99db9276a5899d3502b99204b36 100644 --- a/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py +++ b/summit/multiview_platform/monoview_classifiers/adaboost_pregen.py @@ -129,6 +129,7 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) for i in range(self.estimator_errors_.shape[0])]) + self.feature_importances_ = np.ones(X.shape[1]) return self # def canProbas(self): @@ -172,32 +173,35 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, # self.n_stumps_per_attribute = params["n_tumps"] # return self - def getInterpret(self, directory, y_test): - interpretString = "" - # interpretString += self.getFeatureImportance(directory) - # interpretString += "\n\n Estimator error | Estimator weight\n" - # interpretString += "\n".join( - # [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for - # error, weight in - # zip(self.estimator_errors_, self.estimator_weights_)]) - # step_test_metrics = np.array( - # [self.plotted_metric.score(y_test, step_pred) for step_pred in - # self.step_predictions]) - # get_accuracy_graph(step_test_metrics, "AdaboostPregen", - # directory + "test_metrics.png", - # self.plotted_metric_name, set="test") - # # get_accuracy_graph(self.metrics, "AdaboostPregen", - # # directory + "metrics.png", self.plotted_metric_name, - # # bounds=list(self.bounds), - # # bound_name="boosting bound") - # np.savetxt(directory + "test_metrics.csv", step_test_metrics, - # delimiter=',') - # np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') - # np.savetxt(directory + "times.csv", - # np.array([self.train_time, self.pred_time]), delimiter=',') - # np.savetxt(directory + "times_iter.csv", - # np.array([self.train_time, len(self.estimator_weights_)]), delimiter=',') - return interpretString + # def getInterpret(self, directory, y_test): + # # interpretString = "" + # # interpretString += self.getFeatureImportance(directory) + # # interpretString += "\n\n Estimator error | Estimator weight\n" + # # interpretString += "\n".join( + # # [str(error) + " | " + str(weight / sum(self.estimator_weights_)) for + # # error, weight in + # # zip(self.estimator_errors_, self.estimator_weights_)]) + # # step_test_metrics = np.array( + # # [self.plotted_metric.score(y_test, step_pred) for step_pred in + # # self.step_predictions]) + # # get_accuracy_graph(step_test_metrics, "AdaboostPregen", + # # directory + "test_metrics.png", + # # self.plotted_metric_name, set="test") + # # # get_accuracy_graph(self.metrics, "AdaboostPregen", + # # # directory + "metrics.png", self.plotted_metric_name, + # # # bounds=list(self.bounds), + # # # bound_name="boosting bound") + # # np.savetxt(directory + "test_metrics.csv", step_test_metrics, + # # delimiter=',') + # # np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',') + # # np.savetxt(directory + "times.csv", + # # np.array([self.train_time, self.pred_time]), delimiter=',') + # # np.savetxt(directory + "times_iter.csv", + # # np.array([self.train_time, len(self.estimator_weights_)]), delimiter=',') + # return interpretString + + def feature_importances_(self, value): + self._feature_importances_ = value # def formatCmdArgs(args): # """Used to format kwargs for the parsed args""" @@ -207,10 +211,10 @@ class AdaboostPregen(AdaBoostClassifier, BaseMonoviewClassifier, # return kwargsDict -def paramsToSet(nIter, random_state): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({"n_estimators": random_state.randint(1, 500), - "base_estimator": None}) - return paramsSet +# def paramsToSet(nIter, random_state): +# """Used for weighted linear early fusion to generate random search sets""" +# paramsSet = [] +# for _ in range(nIter): +# paramsSet.append({"n_estimators": random_state.randint(1, 500), +# "base_estimator": None}) +# return paramsSet diff --git a/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py b/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py index 44e34d86d8f825da308bf30a193015425db7d2c7..8d473e08dc916d7b0cd5b83b8a0a5c5eb4a6831d 100644 --- a/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py +++ b/summit/multiview_platform/monoview_classifiers/additions/CBBoostUtils.py @@ -15,6 +15,7 @@ from ...monoview.monoview_utils import change_label_to_minus from ... import metrics + # Used for CBBoost class CBBoostClassifier(BaseEstimator, ClassifierMixin, BaseBoost): diff --git a/summit/multiview_platform/monoview_classifiers/additions/SelOptCB.py b/summit/multiview_platform/monoview_classifiers/additions/SelOptCB.py new file mode 100644 index 0000000000000000000000000000000000000000..3e5a5c708111d9b24a9b701d9646fc83e3c93beb --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/additions/SelOptCB.py @@ -0,0 +1,567 @@ +import logging +import math +import time +import os + +import numpy as np +import numpy.ma as ma +import scipy +from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin +from sklearn.utils.validation import check_is_fitted +from sklearn.metrics import zero_one_loss +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import DecisionTreeClassifier +from ...monoview.monoview_utils import BaseMonoviewClassifier + +def change_label_to_minus(y): + """ + Change the label 0 to minus one + + Parameters + ---------- + y : + + Returns + ------- + label y with -1 instead of 0 + + """ + minus_y = np.copy(y) + minus_y[np.where(y == 0)] = -1 + return minus_y + +class BaseBoost(object): + + def _collect_probas(self, X, sub_sampled=False): + if self.estimators_generator.__class__.__name__ == "TreeClassifiersGenerator": + return np.asarray([clf.predict_proba(X[:, attribute_indices]) for + clf, attribute_indices in + zip(self.estimators_generator.estimators_, + self.estimators_generator.attribute_indices)]) + else: + return np.asarray([clf.predict_proba(X) for clf in + self.estimators_generator.estimators_]) + + def _binary_classification_matrix(self, X): + probas = self._collect_probas(X) + predicted_labels = np.argmax(probas, axis=2) + predicted_labels[predicted_labels == 0] = -1 + values = np.max(probas, axis=2) + return (predicted_labels * values).T + + def _initialize_alphas(self, n_examples): + raise NotImplementedError( + "Alpha weights initialization function is not implemented.") + + def check_opposed_voters(self, ): + nb_opposed = 0 + oppposed = [] + for column in self.classification_matrix[:, + self.chosen_columns_].transpose(): + for chosen_col in self.chosen_columns_: + if (-column.reshape((self.n_total_examples, + 1)) == self.classification_matrix[:, + chosen_col].reshape( + (self.n_total_examples, 1))).all(): + nb_opposed += 1 + break + return int(nb_opposed / 2) + +def sign(array): + """Computes the elementwise sign of all elements of an array. The sign function returns -1 if x <=0 and 1 if x > 0. + Note that numpy's sign function can return 0, which is not desirable in most cases in Machine Learning algorithms. + + Parameters + ---------- + array : array-like + Input values. + + Returns + ------- + ndarray + An array with the signs of input elements. + + """ + signs = np.sign(array) + + signs[array == 0] = -1 + return signs + +class CboundStumpBuilder(BaseEstimator, ClassifierMixin): + + def __init__(self, col_ind=0): + super(CboundStumpBuilder, self).__init__() + self.col_ind = col_ind + + def fit(self, X, y, base_vote=None, it_ind=0): + self.n_samples = X.shape[0] + X = X[:, self.col_ind].reshape((self.n_samples, 1)) + self.sorted_inds = np.argsort(X, axis=0).reshape(self.n_samples) + ord_X = X[self.sorted_inds, :] + intervals = np.zeros((self.n_samples-1, 2)) + intervals[:, 0] = ord_X[:-1, 0] + intervals[:, 1] = ord_X[1:, 0] + thresholds = np.mean(intervals, axis=1) + preds = np.array([np.array([(X[i] - th) / np.abs(X[i] - th) for th in thresholds]) for i in range(X.shape[0])])[:,:,0] + # print(preds.shape) + gg = np.sum(preds * y, axis=0) / self.n_samples + t = np.sum(preds * base_vote, axis=0)/self.n_samples + ng = np.sum(preds * preds, axis=0)/self.n_samples + base_margin = np.sum(base_vote * y) + base_norm = np.sum(np.square(base_vote)) + nf = base_norm / self.n_samples + gf = base_margin / self.n_samples + weights = (-4 * gf * gg * t + 4 * np.square( + gg) * nf) / ( + 4 * gf * gg * ng - 4 * np.square( + gg) * t) + cb2 = 1 - (gg * weights + gf) ** 2 / ( + nf + 2 * weights * t + ng * weights ** 2) + cb2[np.isnan(cb2)] = np.inf + self.th = thresholds[np.argmin(cb2)] + self.weight = weights[np.argmin(cb2)] + self.cb = cb2[np.argmin(cb2)] + + def predict(self, X, ): + X = X[:, self.col_ind] + return np.transpose(np.array([(X[i] - self.th) / np.abs(X[i] - self.th) if X[i]!=self.th else 0 for i in range(X.shape[0])])) + + +class CboundPseudoStumpBuilder(BaseEstimator, ClassifierMixin): + + def __init__(self, col_ind=0): + super(CboundPseudoStumpBuilder, self).__init__() + self.col_ind = col_ind + + def fit(self, X, y, base_vote=None, it_ind=0): + self.n_samples = X.shape[0] + X = X[:, self.col_ind].reshape((self.n_samples, 1)) + self.sorted_inds = np.argsort(X, axis=0).reshape(self.n_samples) + ord_X = X[self.sorted_inds, :] + intervals = np.zeros((self.n_samples-1, 2)) + intervals[:, 0] = ord_X[:-1, 0] + intervals[:, 1] = ord_X[1:, 0] + m1 = np.mean(X) + 2 * np.std(X) + m2 = np.mean(X) - 2 * np.std(X) + thresholds = np.mean(intervals, axis=1) + preds = np.array([np.array([(X[i] - th) / (m1-th) if X[i] > th else (X[i] - th) / (th-m2) for th in thresholds]) for i in range(X.shape[0])])[:,:,0] + # print(preds.shape) + gg = np.sum(preds * y, axis=0) / self.n_samples + t = np.sum(preds * base_vote, axis=0)/self.n_samples + ng = np.sum(preds * preds, axis=0)/self.n_samples + base_margin = np.sum(base_vote * y) + base_norm = np.sum(np.square(base_vote)) + nf = base_norm / self.n_samples + gf = base_margin / self.n_samples + weights = (-4 * gf * gg * t + 4 * np.square( + gg) * nf) / ( + 4 * gf * gg * ng - 4 * np.square( + gg) * t) + cb2 = 1 - (gg * weights + gf) ** 2 / ( + nf + 2 * weights * t + ng * weights ** 2) + cb2[np.isnan(cb2)] = np.inf + self.m1 = m1 + self.m2 = m2 + self.th = thresholds[np.argmin(cb2)] + self.weight = weights[np.argmin(cb2)] + self.cb = cb2[np.argmin(cb2)] + + def predict(self, X, ): + X = X[:, self.col_ind] + return np.transpose(np.array([(X[i] - self.th) / (self.m1-self.th) if X[i] > self.th else (X[i] - self.th) / (self.th-self.m2) for i in range(X.shape[0])])) + + +class CBoundThresholdFinder(BaseEstimator, ClassifierMixin): + + def __init__(self, col_ind=0): + super(CBoundThresholdFinder, self).__init__() + self.col_ind = col_ind + + def fit(self, X, y, base_vote=None, it_ind=0): + if len(np.unique(X[:, self.col_ind])) == 1: + self.th = X[0, self.col_ind] + self.cb = 1.0 + self.weight = 0.0 + self.no_th=True + return self + + self.n_samples = X.shape[0] + X = X[:, self.col_ind].reshape((self.n_samples,1)) + self.sorted_inds = np.argsort(X, axis=0).reshape(self.n_samples) + self.intervals = np.zeros(self.n_samples + 1) + self.intervals[0] = -np.inf + self.intervals[-1] = np.inf + self.intervals[1:-1] = X[self.sorted_inds[1:], 0] + ord_X = X[self.sorted_inds, :] + ord_y = y[self.sorted_inds, :] + ord_base_vote = base_vote[self.sorted_inds, :] + infs = np.transpose(np.tri(self.n_samples, dtype=int)) + sups = np.tri(self.n_samples, k=1, dtype=int) + e11 = np.sum(sups * ord_y, axis=0) / self.n_samples + e12 = np.sum(ord_y.reshape((self.n_samples, 1)) * infs, + axis=0) / self.n_samples + e21 = np.sum(ord_base_vote * sups, axis=0) / self.n_samples + e22 = np.sum(ord_base_vote * infs, axis=0) / self.n_samples + e31 = -np.sum(2 * ord_X * sups, axis=0) / self.n_samples + e32 = -np.sum(2 * ord_X * infs, axis=0) / self.n_samples + d11 = np.sum((ord_X * ord_y) * sups, axis=0) / self.n_samples + d12 = np.sum((ord_X * ord_y) * infs, axis=0) / self.n_samples + d21 = np.sum((ord_X * ord_base_vote) * sups, + axis=0) / self.n_samples + d22 = np.sum((ord_X * ord_base_vote) * infs, + axis=0) / self.n_samples + d31 = np.sum((ord_X * ord_X) * sups, axis=0) / self.n_samples + d32 = np.sum((ord_X * ord_X) * infs, axis=0) / self.n_samples + + f31 = np.sum(sups, axis=0) / self.n_samples + f32 = np.sum(infs, axis=0) / self.n_samples + + base_margin = np.sum(ord_base_vote * ord_y) + base_norm = np.sum(np.square(ord_base_vote)) + nf = base_norm / self.n_samples + gf = base_margin / self.n_samples + m1 = np.mean(X) + 2 * np.std(X) + m2 = np.mean(X) - 2 * np.std(X) + # # + a_ = (-2 * d11 * e21 ** 2 + 4 * d11 * e21 * e22 - 2 * d11 * e22 ** 2 + 2 * d11 * f31 * nf + 2 * d11 * f32 * nf + 2 * d12 * e21 ** 2 - 4 * d12 * e21 * e22 + 2 * d12 * e22 ** 2 - 2 * d12 * f31 * nf - 2 * d12 * f32 * nf + 2 * d21 * e11 * e21 - 2 * d21 * e11 * e22 - 2 * d21 * e12 * e21 + 2 * d21 * e12 * e22 - 2 * d21 * f31 * gf - 2 * d21 * f32 * gf - 2 * d22 * e11 * e21 + 2 * d22 * e11 * e22 + 2 * d22 * e12 * e21 - 2 * d22 * e12 * e22 + 2 * d22 * f31 * gf + 2 * d22 * f32 * gf - 2 * e11 * e21 * e22 * m1 + 2 * e11 * e21 * e22 * m2 + 2 * e11 * e22 ** 2 * m1 - 2 * e11 * e22 ** 2 * m2 + e11 * e31 * nf + e11 * e32 * nf - 2 * e11 * f32 * m1 * nf + 2 * e11 * f32 * m2 * nf + 2 * e12 * e21 ** 2 * m1 - 2 * e12 * e21 ** 2 * m2 - 2 * e12 * e21 * e22 * m1 + 2 * e12 * e21 * e22 * m2 - e12 * e31 * nf - e12 * e32 * nf - 2 * e12 * f31 * m1 * nf + 2 * e12 * f31 * m2 * nf - e21 * e31 * gf - e21 * e32 * gf + 2 * e21 * f32 * gf * m1 - 2 * e21 * f32 * gf * m2 + e22 * e31 * gf + e22 * e32 * gf + 2 * e22 * f31 * gf * m1 - 2 * e22 * f31 * gf * m2) + b_ = (2 * d11 * d21 * e21 - 2 * d11 * d21 * e22 - 2 * d11 * d22 * e21 + 2 * d11 * d22 * e22 + 6 * d11 * e21 ** 2 * m2 - 2 * d11 * e21 * e22 * m1 - 10 * d11 * e21 * e22 * m2 + 2 * d11 * e22 ** 2 * m1 + 4 * d11 * e22 ** 2 * m2 + d11 * e31 * nf + d11 * e32 * nf - 6 * d11 * f31 * m2 * nf - 2 * d11 * f32 * m1 * nf - 4 * d11 * f32 * m2 * nf - 2 * d12 * d21 * e21 + 2 * d12 * d21 * e22 + 2 * d12 * d22 * e21 - 2 * d12 * d22 * e22 - 4 * d12 * e21 ** 2 * m1 - 2 * d12 * e21 ** 2 * m2 + 10 * d12 * e21 * e22 * m1 + 2 * d12 * e21 * e22 * m2 - 6 * d12 * e22 ** 2 * m1 - d12 * e31 * nf - d12 * e32 * nf + 4 * d12 * f31 * m1 * nf + 2 * d12 * f31 * m2 * nf + 6 * d12 * f32 * m1 * nf - 2 * d21 ** 2 * e11 + 2 * d21 ** 2 * e12 + 4 * d21 * d22 * e11 - 4 * d21 * d22 * e12 - 6 * d21 * e11 * e21 * m2 + 4 * d21 * e11 * e22 * m1 + 2 * d21 * e11 * e22 * m2 - 2 * d21 * e12 * e21 * m1 + 8 * d21 * e12 * e21 * m2 - 2 * d21 * e12 * e22 * m1 - 4 * d21 * e12 * e22 * m2 - d21 * e31 * gf - d21 * e32 * gf + 6 * d21 * f31 * gf * m2 + 2 * d21 * f32 * gf * m1 + 4 * d21 * f32 * gf * m2 - 2 * d22 ** 2 * e11 + 2 * d22 ** 2 * e12 + 4 * d22 * e11 * e21 * m1 + 2 * d22 * e11 * e21 * m2 - 8 * d22 * e11 * e22 * m1 + 2 * d22 * e11 * e22 * m2 - 2 * d22 * e12 * e21 * m1 - 4 * d22 * e12 * e21 * m2 + 6 * d22 * e12 * e22 * m1 + d22 * e31 * gf + d22 * e32 * gf - 4 * d22 * f31 * gf * m1 - 2 * d22 * f31 * gf * m2 - 6 * d22 * f32 * gf * m1 + 2 * d31 * e11 * nf - 2 * d31 * e12 * nf - 2 * d31 * e21 * gf + 2 * d31 * e22 * gf + 2 * d32 * e11 * nf - 2 * d32 * e12 * nf - 2 * d32 * e21 * gf + 2 * d32 * e22 * gf + 2 * e11 * e21 * e22 * m1 * m2 - 2 * e11 * e21 * e22 * m2 ** 2 - 2 * e11 * e22 ** 2 * m1 ** 2 + 2 * e11 * e22 ** 2 * m1 * m2 - 3 * e11 * e31 * m2 * nf - 4 * e11 * e32 * m1 * nf + e11 * e32 * m2 * nf + 2 * e11 * f32 * m1 ** 2 * nf - 2 * e11 * f32 * m1 * m2 * nf - 2 * e12 * e21 ** 2 * m1 * m2 + 2 * e12 * e21 ** 2 * m2 ** 2 + 2 * e12 * e21 * e22 * m1 ** 2 - 2 * e12 * e21 * e22 * m1 * m2 - e12 * e31 * m1 * nf + 4 * e12 * e31 * m2 * nf + 3 * e12 * e32 * m1 * nf + 2 * e12 * f31 * m1 * m2 * nf - 2 * e12 * f31 * m2 ** 2 * nf + 3 * e21 * e31 * gf * m2 + 4 * e21 * e32 * gf * m1 - e21 * e32 * gf * m2 - 2 * e21 * f32 * gf * m1 ** 2 + 2 * e21 * f32 * gf * m1 * m2 + e22 * e31 * gf * m1 - 4 * e22 * e31 * gf * m2 - 3 * e22 * e32 * gf * m1 - 2 * e22 * f31 * gf * m1 * m2 + 2 * e22 * f31 * gf * m2 ** 2) + c_ = (-6 * d11 * d21 * e21 * m2 + 6 * d11 * d21 * e22 * m2 + 6 * d11 * d22 * e21 * m2 - 6 * d11 * d22 * e22 * m2 - 6 * d11 * e21 ** 2 * m2 ** 2 + 6 * d11 * e21 * e22 * m1 * m2 + 6 * d11 * e21 * e22 * m2 ** 2 - 6 * d11 * e22 ** 2 * m1 * m2 - 3 * d11 * e31 * m2 * nf - 3 * d11 * e32 * m2 * nf + 6 * d11 * f31 * m2 ** 2 * nf + 6 * d11 * f32 * m1 * m2 * nf + 6 * d12 * d21 * e21 * m1 - 6 * d12 * d21 * e22 * m1 - 6 * d12 * d22 * e21 * m1 + 6 * d12 * d22 * e22 * m1 + 6 * d12 * e21 ** 2 * m1 * m2 - 6 * d12 * e21 * e22 * m1 ** 2 - 6 * d12 * e21 * e22 * m1 * m2 + 6 * d12 * e22 ** 2 * m1 ** 2 + 3 * d12 * e31 * m1 * nf + 3 * d12 * e32 * m1 * nf - 6 * d12 * f31 * m1 * m2 * nf - 6 * d12 * f32 * m1 ** 2 * nf + 6 * d21 ** 2 * e11 * m2 - 6 * d21 ** 2 * e12 * m2 - 6 * d21 * d22 * e11 * m1 - 6 * d21 * d22 * e11 * m2 + 6 * d21 * d22 * e12 * m1 + 6 * d21 * d22 * e12 * m2 + 6 * d21 * e11 * e21 * m2 ** 2 - 6 * d21 * e11 * e22 * m1 * m2 - 6 * d21 * e12 * e21 * m2 ** 2 + 6 * d21 * e12 * e22 * m1 * m2 + 3 * d21 * e31 * gf * m2 + 3 * d21 * e32 * gf * m2 - 6 * d21 * f31 * gf * m2 ** 2 - 6 * d21 * f32 * gf * m1 * m2 + 6 * d22 ** 2 * e11 * m1 - 6 * d22 ** 2 * e12 * m1 - 6 * d22 * e11 * e21 * m1 * m2 + 6 * d22 * e11 * e22 * m1 ** 2 + 6 * d22 * e12 * e21 * m1 * m2 - 6 * d22 * e12 * e22 * m1 ** 2 - 3 * d22 * e31 * gf * m1 - 3 * d22 * e32 * gf * m1 + 6 * d22 * f31 * gf * m1 * m2 + 6 * d22 * f32 * gf * m1 ** 2 - 6 * d31 * e11 * m2 * nf + 6 * d31 * e12 * m2 * nf + 6 * d31 * e21 * gf * m2 - 6 * d31 * e22 * gf * m2 - 6 * d32 * e11 * m1 * nf + 6 * d32 * e12 * m1 * nf + 6 * d32 * e21 * gf * m1 - 6 * d32 * e22 * gf * m1 + 3 * e11 * e31 * m2 ** 2 * nf + 3 * e11 * e32 * m1 ** 2 * nf - 3 * e12 * e31 * m2 ** 2 * nf - 3 * e12 * e32 * m1 ** 2 * nf - 3 * e21 * e31 * gf * m2 ** 2 - 3 * e21 * e32 * gf * m1 ** 2 + 3 * e22 * e31 * gf * m2 ** 2 + 3 * e22 * e32 * gf * m1 ** 2) + d_ = (2 * d11 * d21 * d22 * m1 - 2 * d11 * d21 * d22 * m2 + 6 * d11 * d21 * e21 * m2 ** 2 - 2 * d11 * d21 * e22 * m1 * m2 - 4 * d11 * d21 * e22 * m2 ** 2 - 2 * d11 * d22 ** 2 * m1 + 2 * d11 * d22 ** 2 * m2 - 2 * d11 * d22 * e21 * m1 * m2 - 4 * d11 * d22 * e21 * m2 ** 2 - 2 * d11 * d22 * e22 * m1 ** 2 + 8 * d11 * d22 * e22 * m1 * m2 + 2 * d11 * d32 * m1 * nf - 2 * d11 * d32 * m2 * nf + 2 * d11 * e21 ** 2 * m2 ** 3 - 4 * d11 * e21 * e22 * m1 * m2 ** 2 + 2 * d11 * e22 ** 2 * m1 ** 2 * m2 + 3 * d11 * e31 * m2 ** 2 * nf - d11 * e32 * m1 ** 2 * nf + 4 * d11 * e32 * m1 * m2 * nf - 2 * d11 * f31 * m2 ** 3 * nf - 2 * d11 * f32 * m1 ** 2 * m2 * nf - 2 * d12 * d21 ** 2 * m1 + 2 * d12 * d21 ** 2 * m2 + 2 * d12 * d21 * d22 * m1 - 2 * d12 * d21 * d22 * m2 - 8 * d12 * d21 * e21 * m1 * m2 + 2 * d12 * d21 * e21 * m2 ** 2 + 4 * d12 * d21 * e22 * m1 ** 2 + 2 * d12 * d21 * e22 * m1 * m2 + 4 * d12 * d22 * e21 * m1 ** 2 + 2 * d12 * d22 * e21 * m1 * m2 - 6 * d12 * d22 * e22 * m1 ** 2 + 2 * d12 * d31 * m1 * nf - 2 * d12 * d31 * m2 * nf - 2 * d12 * e21 ** 2 * m1 * m2 ** 2 + 4 * d12 * e21 * e22 * m1 ** 2 * m2 - 2 * d12 * e22 ** 2 * m1 ** 3 - 4 * d12 * e31 * m1 * m2 * nf + d12 * e31 * m2 ** 2 * nf - 3 * d12 * e32 * m1 ** 2 * nf + 2 * d12 * f31 * m1 * m2 ** 2 * nf + 2 * d12 * f32 * m1 ** 3 * nf - 6 * d21 ** 2 * e11 * m2 ** 2 + 2 * d21 ** 2 * e12 * m1 * m2 + 4 * d21 ** 2 * e12 * m2 ** 2 + 10 * d21 * d22 * e11 * m1 * m2 + 2 * d21 * d22 * e11 * m2 ** 2 - 2 * d21 * d22 * e12 * m1 ** 2 - 10 * d21 * d22 * e12 * m1 * m2 - 2 * d21 * d32 * gf * m1 + 2 * d21 * d32 * gf * m2 - 2 * d21 * e11 * e21 * m2 ** 3 + 2 * d21 * e11 * e22 * m1 * m2 ** 2 + 2 * d21 * e12 * e21 * m1 * m2 ** 2 - 2 * d21 * e12 * e22 * m1 ** 2 * m2 - 3 * d21 * e31 * gf * m2 ** 2 + d21 * e32 * gf * m1 ** 2 - 4 * d21 * e32 * gf * m1 * m2 + 2 * d21 * f31 * gf * m2 ** 3 + 2 * d21 * f32 * gf * m1 ** 2 * m2 - 4 * d22 ** 2 * e11 * m1 ** 2 - 2 * d22 ** 2 * e11 * m1 * m2 + 6 * d22 ** 2 * e12 * m1 ** 2 - 2 * d22 * d31 * gf * m1 + 2 * d22 * d31 * gf * m2 + 2 * d22 * e11 * e21 * m1 * m2 ** 2 - 2 * d22 * e11 * e22 * m1 ** 2 * m2 - 2 * d22 * e12 * e21 * m1 ** 2 * m2 + 2 * d22 * e12 * e22 * m1 ** 3 + 4 * d22 * e31 * gf * m1 * m2 - d22 * e31 * gf * m2 ** 2 + 3 * d22 * e32 * gf * m1 ** 2 - 2 * d22 * f31 * gf * m1 * m2 ** 2 - 2 * d22 * f32 * gf * m1 ** 3 + 6 * d31 * e11 * m2 ** 2 * nf - 2 * d31 * e12 * m1 * m2 * nf - 4 * d31 * e12 * m2 ** 2 * nf - 6 * d31 * e21 * gf * m2 ** 2 + 2 * d31 * e22 * gf * m1 * m2 + 4 * d31 * e22 * gf * m2 ** 2 + 4 * d32 * e11 * m1 ** 2 * nf + 2 * d32 * e11 * m1 * m2 * nf - 6 * d32 * e12 * m1 ** 2 * nf - 4 * d32 * e21 * gf * m1 ** 2 - 2 * d32 * e21 * gf * m1 * m2 + 6 * d32 * e22 * gf * m1 ** 2 - e11 * e31 * m2 ** 3 * nf - e11 * e32 * m1 ** 2 * m2 * nf + e12 * e31 * m1 * m2 ** 2 * nf + e12 * e32 * m1 ** 3 * nf + e21 * e31 * gf * m2 ** 3 + e21 * e32 * gf * m1 ** 2 * m2 - e22 * e31 * gf * m1 * m2 ** 2 - e22 * e32 * gf * m1 ** 3) + e_ = - 2 * d11 * d21 * d22 * m1 * m2 + 2 * d11 * d21 * d22 * m2 ** 2 - 2 * d11 * d21 * e21 * m2 ** 3 + 2 * d11 * d21 * e22 * m1 * m2 ** 2 + 2 * d11 * d22 ** 2 * m1 ** 2 - 2 * d11 * d22 ** 2 * m1 * m2 + 2 * d11 * d22 * e21 * m1 * m2 ** 2 - 2 * d11 * d22 * e22 * m1 ** 2 * m2 - 2 * d11 * d32 * m1 ** 2 * nf + 2 * d11 * d32 * m1 * m2 * nf - d11 * e31 * m2 ** 3 * nf - d11 * e32 * m1 ** 2 * m2 * nf + 2 * d12 * d21 ** 2 * m1 * m2 - 2 * d12 * d21 ** 2 * m2 ** 2 - 2 * d12 * d21 * d22 * m1 ** 2 + 2 * d12 * d21 * d22 * m1 * m2 + 2 * d12 * d21 * e21 * m1 * m2 ** 2 - 2 * d12 * d21 * e22 * m1 ** 2 * m2 - 2 * d12 * d22 * e21 * m1 ** 2 * m2 + 2 * d12 * d22 * e22 * m1 ** 3 - 2 * d12 * d31 * m1 * m2 * nf + 2 * d12 * d31 * m2 ** 2 * nf + d12 * e31 * m1 * m2 ** 2 * nf + d12 * e32 * m1 ** 3 * nf + 2 * d21 ** 2 * e11 * m2 ** 3 - 2 * d21 ** 2 * e12 * m1 * m2 ** 2 - 4 * d21 * d22 * e11 * m1 * m2 ** 2 + 4 * d21 * d22 * e12 * m1 ** 2 * m2 + 2 * d21 * d32 * gf * m1 ** 2 - 2 * d21 * d32 * gf * m1 * m2 + d21 * e31 * gf * m2 ** 3 + d21 * e32 * gf * m1 ** 2 * m2 + 2 * d22 ** 2 * e11 * m1 ** 2 * m2 - 2 * d22 ** 2 * e12 * m1 ** 3 + 2 * d22 * d31 * gf * m1 * m2 - 2 * d22 * d31 * gf * m2 ** 2 - d22 * e31 * gf * m1 * m2 ** 2 - d22 * e32 * gf * m1 ** 3 - 2 * d31 * e11 * m2 ** 3 * nf + 2 * d31 * e12 * m1 * m2 ** 2 * nf + 2 * d31 * e21 * gf * m2 ** 3 - 2 * d31 * e22 * gf * m1 * m2 ** 2 - 2 * d32 * e11 * m1 ** 2 * m2 * nf + 2 * d32 * e12 * m1 ** 3 * nf + 2 * d32 * e21 * gf * m1 ** 2 * m2 - 2 * d32 * e22 * gf * m1 ** 3 + # + roots = np.zeros((self.n_samples, 4)) + for ind, (c4, c3, c2, c1, c0) in enumerate(zip(a_, b_, c_, d_, e_)): + poly_roots = np.roots(np.array([c4, c3, c2, c1, c0])) + if poly_roots.shape[0]==4: + roots[ind] = np.roots(np.array([c4, c3, c2, c1, c0])) + ok_roots = [] + poly_inds = [] + for i in range(4): + is_sup = np.greater(roots[:, i], self.intervals[:-1]) + is_inf = np.less(roots[:, i], self.intervals[1:]) + avail_roots = np.where(is_inf*is_sup)[0] + + # for root, sup, inf, int_inf, int_sup, avail in zip(roots[:,i], is_sup, is_inf, self.intervals[:-1], self.intervals[1:], is_inf*is_sup): + # print("Root {} in inter [{}; {}], is {}".format(root, int_inf, int_sup, avail)) + # quit() + ok_roots += list(roots[avail_roots, i]) + poly_inds+=list(avail_roots) + ok_roots = np.array(ok_roots) + self.ths = X[self.sorted_inds, 0] + gg = (d11[poly_inds] - ok_roots * e11[poly_inds]) / (m1 - ok_roots) + ( + d12[poly_inds] - ok_roots * e12[poly_inds]) / (ok_roots - m2) + t = ((d21[poly_inds] - ok_roots * e21[poly_inds]) / (m1 - ok_roots) + ( + d22[poly_inds] - ok_roots * e22[poly_inds]) / (ok_roots - m2)) + ng = (d31[poly_inds] + e31[poly_inds] * ok_roots + f31[poly_inds] * ok_roots ** 2) / ( + m1 - ok_roots) ** 2 + ( + d32[poly_inds] + ok_roots * e32[poly_inds] + ok_roots ** 2 * f32[poly_inds]) / ( + ok_roots - m2) ** 2 + + # ths = np.linspace(m2, m1) + # preds = np.array([np.array([(X[i] - th) / m1 - th if + # X[i] > th else (X[i] - th) / ( + # th - m2) for th in ths]) for i in range(X.shape[0])])[:,:,0] + # gg = np.sum(preds*y, axis=0)/self.n_samples + # t = np.sum(preds * base_vote, axis=0)/self.n_samples + # ng = np.sum(preds * preds, axis=0)/self.n_samples + # gg = (d11 - ths * e11) / (m1 - ths) + ( + # d12 - ths * e12) / (ths - m2) + # t = ((d21 - ths * e21) / (m1 - ths) + ( + # d22 - ths * e22) / (ths - m2)) + # ng = (d31 + e31 * ths + f31 * ths ** 2) / ( + # m1 - ths) ** 2 + ( + # d32 + ths * e32 + ths ** 2 * f32) / ( + # ok_roots - m2) ** 2 + # if it_ind < len(plif) and self.col_ind == plif[it_ind][1]: + # self.th = plif[it_ind][0] + # self.to_choose=True + # else: + # self.th = 0 + # self.to_choose=False + # preds = np.array([(X[i] - self.th) / (m1 - self.th) if + # X[i] > self.th else (X[i] - self.th) / ( + # self.th - m2) for i in range(X.shape[0])]) + # gg = np.sum(preds[self.sorted_inds]*ord_y) + # t = np.sum(preds[self.sorted_inds]*base_vote[self.sorted_inds]) + # ng = np.sum(preds[self.sorted_inds]*preds[self.sorted_inds]) + weights = (-4 * base_margin * gg * t + 4 * np.square( + gg) * base_norm) / ( + 4 * base_margin * gg * ng - 4 * np.square( + gg) * t) + cb2 = 1 - (gg * weights + base_margin / (self.n_samples)) ** 2 / ( + base_norm / ( + self.n_samples) + 2 * weights * t + ng * weights ** 2) + + + self.th = ok_roots[np.argmin(cb2)] + self.m1 = m1 + self.m2 = m2 + self.no_th = False + # if self.col_ind ==57: + # print("OK roots : {}, cb2 : {}, th: {}".format(ok_roots, cb2, round(self.th, 2))) + # print(X[self.sorted_inds, :]) + self.cb = cb2[np.argmin(cb2)] + self.weight = weights[np.argmin(cb2)] + return self + + def predict(self, X): + X = X[:, self.col_ind] + if self.no_th: + return np.transpose(np.zeros(self.n_samples)) + else: + return np.sign(np.transpose(np.array([(X[i] - self.th) / (self.m1 - self.th) if + X[i] > self.th else (X[i] - self.th) / ( + self.th - self.m2) for i in range(X.shape[0])]))) + + +class RandomStump(BaseEstimator, ClassifierMixin): + + def __init__(self, rs=42): + self.rs = np.random.RandomState(rs) + + def fit(self, X, y): + self.col_ind = self.rs.choice(np.arange(X.shape[1]), 1)[0] + self.th = self.rs.uniform(np.min(self.col_ind), np.max(self.col_ind), 1)[0] + # print(np.sum(np.array([(X[i, self.col_ind] - self.th) / abs(X[i, self.col_ind] - self.th) for i in range(X.shape[0])]) *y)) + if np.sum(np.array([(X[i, self.col_ind] - self.th) / abs(X[i, self.col_ind] - self.th) for i in range(X.shape[0])]) *y) < 0: + self.reverse=-1 + else: + self.reverse=1 + return self + + def predict(self, X): + X = X[:, self.col_ind] + return self.reverse*np.transpose(np.array([(X[i] - self.th) / abs(X[i] - self.th) for i in range(X.shape[0])])) + + + + + +# Used for CBBoost + +class SelfOptCBBoostClassifier(BaseMonoviewClassifier): + def __init__(self, n_max_iterations=10, random_state=42, twice_the_same=True, + random_start=False, plotted_metric=zero_one_loss, save_train_data=True, + test_graph=True, base_estimator="BaseStump"): + super(SelfOptCBBoostClassifier, self).__init__() + r""" + + Parameters + ---------- + n_max_iterations : int + Maximum number of iterations for the boosting algorithm. + estimators_generator : object + Sk-learn classifier object used to generate the hypotheses with the data. + random_state : np.random.RandomState or int + The random state, used in order to be reproductible + self_complemented : bool + If True, in the hypotheses generation process, for each hypothesis, it's complement will be generated too. + twice_the_same : bool + If True, the algorithm will be allowed to select twice the same hypothesis in the boosting process. + c_bound_choice : bool + If True, the C-Bound will be used to select the hypotheses. If False, the margin will be the criterion. + n_stumps_per_attribute : int + The number of hypotheses generated by data attribute + use_r : bool + If True, uses edge to compute the performance of a voter. If False, use the error instead. + plotted_metric : Metric module + The metric that will be plotted for each iteration of boosting. + """ + if type(random_state) is int: + self.random_state = np.random.RandomState(random_state) + else: + self.random_state = random_state + self.train_time = 0 + self.train_shape = None + self.step_decisions = None + self.step_prod = None + self.n_max_iterations = n_max_iterations + self.twice_the_same = twice_the_same + self.random_start = random_start + self.plotted_metric = plotted_metric + self.save_train_data = save_train_data + self.test_graph = test_graph + self.printed_args_name_list = ["n_max_iterations" + "twice_the_same", + "random_start",] + self.param_names = [] + self.classed_params = [] + self.distribs = [] + self.weird_strings = {} + self.base_estimator = base_estimator + + def fit(self, X, y): + self.n_features = X.shape[1] + formatted_X, formatted_y = self.format_X_y(X, y) + + self.init_info_containers() + + # Initialize the weak classifiers ensemble + m, n = formatted_X.shape + + start = time.time() + self.n_total_hypotheses_ = n + self.n_total_examples = m + + # Initialize the majority vote + self.init_boosting(formatted_X, formatted_y) + + self.break_cause = " the maximum number of iterations was attained." + + for k in range(self.n_max_iterations):# - 1 if self.n_max_iterations is not None else np.inf)): + + # Print dynamically the step and the error of the current classifier + self.it = k+1 + + # Find the best (weight, voter) couple. + self._find_new_voter(formatted_X, formatted_y) + + end = time.time() + self.train_time = end - start + return self + + def predict_proba(self, X): + start = time.time() + check_is_fitted(self, 'weights_') + if scipy.sparse.issparse(X): + logging.warning('Converting sparse matrix to dense matrix.') + X = np.array(X.todense()) + + votes = np.array([voter.predict(X) for voter in self.voters]) + vote = np.average(votes, weights=self.weights_, axis=1) + proba = np.array([np.array([(1 - vote_sample)/2, (1 + vote_sample)/2]) for vote_sample in vote]) + return proba + + def predict(self, X): + return self._iter_predict(X, self.n_max_iterations) + + def _iter_predict(self, X, iter_index=1): + start = time.time() + check_is_fitted(self, 'weights_') + if scipy.sparse.issparse(X): + logging.warning('Converting sparse matrix to dense matrix.') + X = np.array(X.todense()) + + votes = np.array( + [voter.predict(X) for voter in self.voters]).transpose() + vote = np.sum(votes[:, :iter_index] * self.weights_[:iter_index], axis=1) + + signs_array = np.array([int(x) for x in sign(vote)]) + signs_array[signs_array == -1] = 0 + + end = time.time() + self.predict_time = end - start + + # Predict for each step of the boosting process + + return signs_array + + + def init_boosting(self, X, y): + """THis initialization corressponds to the first round of boosting with equal weights for each examples and the voter chosen by it's margin.""" + + if self.random_start: + voter = RandomStump().fit(X, y) + else: + voter = DecisionTreeClassifier(max_depth=1).fit(X, y) + self.voters.append(voter) + + self.previous_vote = voter.predict(X).astype(np.float64) + self.q = 1 + self.weights_.append(self.q) + + + def format_X_y(self, X, y): + """Formats the data : X -the examples- and y -the labels- to be used properly by the algorithm """ + if scipy.sparse.issparse(X): + logging.info('Converting to dense matrix.') + X = np.array(X.todense()) + # Initialization + y_neg = change_label_to_minus(y) + y_neg = y_neg.reshape((y.shape[0], 1)) + return X, y_neg + + + def init_info_containers(self): + """Initialize the containers that will be collected at each iteration for the analysis""" + self.weights_ = [] + self.voters = [] + self.chosen_features = [] + self.chosen_columns_ = [] + self.fobidden_columns = [] + self.c_bounds = [] + self.voter_perfs = [] + self.example_weights_ = [] + self.train_metrics = [] + self.bounds = [] + self.disagreements = [] + self.margins = [] + self.previous_votes = [] + self.previous_margins = [] + self.respected_bound = True + self.selected_margins = [] + self.tau = [] + self.norm = [] + self.mincq_train_metrics = [] + self.mincq_c_bounds = [] + self.mincq_weights = [] + self.mincq_learners = [] + self.mincq_step_decisions = [] + + + + + def _find_new_voter(self, X, y): + """Here, we solve the two_voters_mincq_problem for each potential new voter, + and select the one that has the smallest minimum""" + + m, n = X.shape + prop_cols = np.zeros((m, n*2)) + possible_clfs = [] + for col_ind in range(n): + if self.base_estimator == "BaseStump": + clf = CboundStumpBuilder(col_ind=col_ind) + elif self.base_estimator == "PseudoLinearStump": + clf = CboundPseudoStumpBuilder(col_ind=col_ind) + elif self.base_estimator == "LinearStump": + clf = CBoundThresholdFinder(col_ind=col_ind) + else: + raise AttributeError("Wrong base estimator.") + clf.fit(X, y, self.previous_vote.reshape((m, 1)), it_ind=self.it) + prop_cols[:, col_ind] = clf.predict(X) + prop_cols[:, col_ind+n] = -clf.predict(X) + possible_clfs.append(clf) + margins = np.sum(prop_cols * y, axis=0) + # print(margins) + norms = np.sum(np.square(prop_cols), axis=0) + base_margin = np.sum(self.previous_vote.reshape((m, 1))*y, axis=0) + + tau = np.sum(prop_cols * self.previous_vote.reshape((m, 1)), axis=0) + + base_norm = np.sum(np.square(self.previous_vote)) + + weights = (-4 * base_margin * margins * tau + 4 * np.square( + margins) * base_norm) / ( + 4 * base_margin * margins * norms - 4 * np.square( + margins) * tau) + print(weights, [clf.weight for clf in possible_clfs]) + # print(weights) + cbs = 1 - (1/(m))*(margins * weights + base_margin )**2/(base_norm + 2*weights*tau + norms*(weights**2)) + # print(cbs) + cbs[np.isnan(cbs)] = np.inf + cbs[np.isnan(weights)] = np.inf + # quit() + best_ind = np.argmin(cbs) + # print(possible_clfs[best_ind].to_choose) + # print(best_ind) + if best_ind<n: + self.weights_.append(weights[best_ind]) + self.voters.append(possible_clfs[best_ind]) + else: + self.weights_.append(weights[best_ind]) + self.voters.append(possible_clfs[best_ind-n]) + print(weights[best_ind-n], weights[best_ind]) + # print([plif for plif in zip(y, prop_cols[:, best_ind])]) + # print(weights[best_ind]) + # print("Best CB : {} for col : {}, weighted : {}, with th : {}".format(round(cbs[best_ind], 2), best_ind,round(weights[best_ind], 2), round(possible_clfs[best_ind].th,2))) + self.previous_vote += weights[best_ind]*prop_cols[:, best_ind] diff --git a/summit/multiview_platform/monoview_classifiers/cb_boost.py b/summit/multiview_platform/monoview_classifiers/cb_boost.py index 2374d68ea605a8b6f8bad0f7409dfef244685bc7..f2ca8a3a3195079fa12d79ae38382714533aaacd 100644 --- a/summit/multiview_platform/monoview_classifiers/cb_boost.py +++ b/summit/multiview_platform/monoview_classifiers/cb_boost.py @@ -45,7 +45,7 @@ class CBBoost(CBBoostClassifier, BaseMonoviewClassifier): mincq_tracking=False ) self.param_names = ["n_max_iterations", "n_stumps", "random_state"] - self.distribs = [CustomRandint(low=2, high=500), [n_stumps], + self.distribs = [CustomRandint(low=2, high=500), [1,2,10], [random_state]] self.classed_params = [] self.weird_strings = {} diff --git a/summit/multiview_platform/monoview_classifiers/cq_boost.py b/summit/multiview_platform/monoview_classifiers/cq_boost.py index 2cea12d2b907cfe6840e369133c768a7d2814209..202829ebd01be9c3f9253b1058ff4bc4aff86c02 100644 --- a/summit/multiview_platform/monoview_classifiers/cq_boost.py +++ b/summit/multiview_platform/monoview_classifiers/cq_boost.py @@ -39,23 +39,23 @@ class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): # """Used to know if the classifier can return label probabilities""" # return False - def get_interpretation(self, directory, y_test, multi_class=False): - np.savetxt(directory + "train_metrics.csv", self.train_metrics, - delimiter=',') - np.savetxt(directory + "c_bounds.csv", self.c_bounds, - delimiter=',') - np.savetxt(directory + "y_test_step.csv", self.step_decisions, - delimiter=',') - step_metrics = [] - for step_index in range(self.step_decisions.shape[1] - 1): - step_metrics.append(self.plotted_metric.score(y_test, - self.step_decisions[:, - step_index])) - step_metrics = np.array(step_metrics) - np.savetxt(directory + "step_test_metrics.csv", step_metrics, - delimiter=',') - return getInterpretBase(self, directory, "CQBoost", self.weights_, - y_test) + # def get_interpretation(self, directory, y_test, multi_class=False): + # # np.savetxt(directory + "train_metrics.csv", self.train_metrics, + # # delimiter=',') + # # np.savetxt(directory + "c_bounds.csv", self.c_bounds, + # # delimiter=',') + # # np.savetxt(directory + "y_test_step.csv", self.step_decisions, + # # delimiter=',') + # # step_metrics = [] + # # for step_index in range(self.step_decisions.shape[1] - 1): + # # step_metrics.append(self.plotted_metric.score(y_test, + # # self.step_decisions[:, + # # step_index])) + # # step_metrics = np.array(step_metrics) + # # np.savetxt(directory + "step_test_metrics.csv", step_metrics, + # # delimiter=',') + # return getInterpretBase(self, directory, "CQBoost", self.weights_, + # y_test) # def formatCmdArgs(args): diff --git a/summit/multiview_platform/monoview_classifiers/min_cq.py b/summit/multiview_platform/monoview_classifiers/min_cq.py index 6741cabad98d988f4540020040777bc1d830f2d9..dfc294af52a9843b4fe89dc14816ebb67fba7a2a 100644 --- a/summit/multiview_platform/monoview_classifiers/min_cq.py +++ b/summit/multiview_platform/monoview_classifiers/min_cq.py @@ -616,27 +616,27 @@ class MinCQ(MinCqLearner, BaseMonoviewClassifier): # """Used to know if the classifier can return label probabilities""" # return True - def set_params(self, **params): - self.mu = params["mu"] - self.random_state = params["random_state"] - self.n_stumps_per_attribute = params["n_stumps_per_attribute"] - return self - - def get_params(self, deep=True): - return {"random_state": self.random_state, "mu": self.mu, - "n_stumps_per_attribute": self.n_stumps_per_attribute} - - def getInterpret(self, directory, y_test): - interpret_string = "Train C_bound value : " + str(self.cbound_train) - y_rework = np.copy(y_test) - y_rework[np.where(y_rework == 0)] = -1 - interpret_string += "\n Test c_bound value : " + str( - self.majority_vote.cbound_value(self.x_test, y_rework)) - np.savetxt(directory+"times.csv", np.array([self.train_time, 0])) - return interpret_string - - def get_name_for_fusion(self): - return "MCQ" + # def set_params(self, **params): + # self.mu = params["mu"] + # self.random_state = params["random_state"] + # self.n_stumps_per_attribute = params["n_stumps_per_attribute"] + # return self + + # def get_params(self, deep=True): + # return {"random_state": self.random_state, "mu": self.mu, + # "n_stumps_per_attribute": self.n_stumps_per_attribute} + # # + # def getInterpret(self, directory, y_test): + # interpret_string = "Train C_bound value : " + str(self.cbound_train) + # y_rework = np.copy(y_test) + # y_rework[np.where(y_rework == 0)] = -1 + # interpret_string += "\n Test c_bound value : " + str( + # self.majority_vote.cbound_value(self.x_test, y_rework)) + # np.savetxt(directory+"times.csv", np.array([self.train_time, 0])) + # return interpret_string + + # def get_name_for_fusion(self): + # return "MCQ" # # def formatCmdArgs(args): @@ -646,9 +646,9 @@ class MinCQ(MinCqLearner, BaseMonoviewClassifier): # return kwargsDict -def paramsToSet(nIter, randomState): - """Used for weighted linear early fusion to generate random search sets""" - paramsSet = [] - for _ in range(nIter): - paramsSet.append({}) - return paramsSet +# def paramsToSet(nIter, randomState): +# """Used for weighted linear early fusion to generate random search sets""" +# paramsSet = [] +# for _ in range(nIter): +# paramsSet.append({}) +# return paramsSet diff --git a/summit/multiview_platform/monoview_classifiers/self_opt_cb.py b/summit/multiview_platform/monoview_classifiers/self_opt_cb.py new file mode 100644 index 0000000000000000000000000000000000000000..711e2629923c16b64eb21543115939acd3aabd75 --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/self_opt_cb.py @@ -0,0 +1,13 @@ +from .additions.SelOptCB import SelfOptCBBoostClassifier + +classifier_class_name = "SelfOptCBBoostBaseStump" + +class SelfOptCBBoostBaseStump(SelfOptCBBoostClassifier): + def __init__(self, n_max_iterations=10, random_state=42, twice_the_same=True, + random_start=False, save_train_data=True, + test_graph=True, base_estimator="BaseStump"): + SelfOptCBBoostClassifier.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state, twice_the_same=twice_the_same, + random_start=random_start, save_train_data=save_train_data, + test_graph=test_graph, base_estimator=base_estimator) + + diff --git a/summit/multiview_platform/monoview_classifiers/self_opt_cb_pseudo.py b/summit/multiview_platform/monoview_classifiers/self_opt_cb_pseudo.py new file mode 100644 index 0000000000000000000000000000000000000000..18f028c8d4912c86f8d0b5d3629c23b502fbf290 --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/self_opt_cb_pseudo.py @@ -0,0 +1,13 @@ +from .additions.SelOptCB import SelfOptCBBoostClassifier + +classifier_class_name = "SelfOptCBBoostBaseStump" + +class SelfOptCBBoostBaseStump(SelfOptCBBoostClassifier): + def __init__(self, n_max_iterations=10, random_state=42, twice_the_same=True, + random_start=False, save_train_data=True, + test_graph=True, base_estimator="PseudoLinearStump"): + SelfOptCBBoostClassifier.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state, twice_the_same=twice_the_same, + random_start=random_start, save_train_data=save_train_data, + test_graph=test_graph, base_estimator=base_estimator) + + diff --git a/summit/multiview_platform/monoview_classifiers/self_opt_cb_root.py b/summit/multiview_platform/monoview_classifiers/self_opt_cb_root.py new file mode 100644 index 0000000000000000000000000000000000000000..d7f671f0341c2d8a7fa76eaa377f16352fe44630 --- /dev/null +++ b/summit/multiview_platform/monoview_classifiers/self_opt_cb_root.py @@ -0,0 +1,13 @@ +from .additions.SelOptCB import SelfOptCBBoostClassifier + +classifier_class_name = "SelfOptCBBoostBaseStump" + +class SelfOptCBBoostBaseStump(SelfOptCBBoostClassifier): + def __init__(self, n_max_iterations=10, random_state=42, twice_the_same=True, + random_start=False, save_train_data=True, + test_graph=True, base_estimator="LinearStump"): + SelfOptCBBoostClassifier.__init__(self, n_max_iterations=n_max_iterations, random_state=random_state, twice_the_same=twice_the_same, + random_start=random_start, save_train_data=save_train_data, + test_graph=test_graph, base_estimator=base_estimator) + + diff --git a/summit/multiview_platform/multiview/multiview_utils.py b/summit/multiview_platform/multiview/multiview_utils.py index 99a9d26dd1995750a2e72cb6648aca036837a536..adf9cc1933fc1050c7d629ba0f3bcc6f910e2e1a 100644 --- a/summit/multiview_platform/multiview/multiview_utils.py +++ b/summit/multiview_platform/multiview/multiview_utils.py @@ -35,6 +35,7 @@ class BaseMultiviewClassifier(BaseClassifier): self.used_views = None def set_base_estim_from_dict(self, base_estim_dict, **kwargs): + print(base_estim_dict) if base_estim_dict is None: base_estimator = DecisionTreeClassifier() elif isinstance(base_estim_dict, str) and kwargs is not None: diff --git a/summit/multiview_platform/multiview_classifiers/additions/mv_cb_boost_adapt.py b/summit/multiview_platform/multiview_classifiers/additions/mv_cb_boost_adapt.py index b2b405924f4fce47ff230c5419e65d758f688d4e..a03fcd9ead5837cb4b75e5378f63ba2f4f3fc767 100644 --- a/summit/multiview_platform/multiview_classifiers/additions/mv_cb_boost_adapt.py +++ b/summit/multiview_platform/multiview_classifiers/additions/mv_cb_boost_adapt.py @@ -32,7 +32,8 @@ class MultiviewCBoundBoostingAdapt(BaseMultiviewClassifier, MultiviewCBoundBoost weight_update=weight_update, use_previous_voters=use_previous_voters, full_combination=full_combination, min_cq_pred=min_cq_pred, min_cq_mu=min_cq_mu, - sig_mult=sig_mult, sig_offset=sig_offset, + sig_mult=sig_mult, sig_offset=sig_offset, only_zero_one_weights=False, + update_only_chosen=False, **kwargs) BaseMultiviewClassifier.__init__(self, random_state) self.param_names = ["n_estimators","random_state"] diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py new file mode 100644 index 0000000000000000000000000000000000000000..f5b4ae28bc396817558624a97da2ae7fcbc31db9 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_adaboost.py @@ -0,0 +1,103 @@ +import numpy as np + +from .additions.fusion_utils import BaseFusionClassifier +from ..multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ..utils.dataset import get_samples_views_indices +from ..utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionAdaboost" + + +class EarlyFusionAdaboost(BaseMultiviewClassifier, BaseFusionClassifier): + """ + Builds a monoview dataset by concatenating the views (with a weight if + needed) and learns a monoview classifier on the concatenation + """ + + def __init__(self, random_state=None, view_weights=None, + monoview_classifier_name="adaboost", + monoview_classifier_config={}): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + self.view_weights = view_weights + self.monoview_classifier_name = monoview_classifier_name + self.short_name = "early_fusion" + if monoview_classifier_name in monoview_classifier_config: + self.monoview_classifier_config = monoview_classifier_config[ + monoview_classifier_name] + self.monoview_classifier_config = monoview_classifier_config + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, monoview_classifier_config) + self.param_names = ["monoview_classifier_config"] + self.distribs = [get_available_monoview_classifiers(), + ConfigGenerator(get_available_monoview_classifiers())] + self.classed_params = [] + self.weird_strings = {} + + def set_params(self, monoview_classifier_name="adaboost", + monoview_classifier_config={}, **params): + self.monoview_classifier_name = monoview_classifier_name + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, + monoview_classifier_config) + self.monoview_classifier_config = self.monoview_classifier.get_params() + self.short_name = "early_fusion_adaboost" + return self + + def get_params(self, deep=True): + return {"random_state": self.random_state, + "view_weights": self.view_weights, + "monoview_classifier_name": self.monoview_classifier_name, + "monoview_classifier_config": self.monoview_classifier_config} + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + self.monoview_classifier_config = self.monoview_classifier.get_params() + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + if self.view_weights is None: + self.view_weights = np.ones(len(self.view_indices), dtype=float) + else: + self.view_weights = np.array(self.view_weights) + self.view_weights /= float(np.sum(self.view_weights)) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for view_weight, (index, view_idx) + in zip(self.view_weights, enumerate(self.view_indices))], axis=1) + return monoview_data + + # def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): + # if monoview_classifier_name in monoview_classifier_config: + # self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) + # else: + # self.monoview_classifier.set_params(**monoview_classifier_config) diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_cb.py b/summit/multiview_platform/multiview_classifiers/early_fusion_cb.py new file mode 100644 index 0000000000000000000000000000000000000000..7fe2f034d813be94e6b0e06c8b1be18cfc90479a --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_cb.py @@ -0,0 +1,103 @@ +import numpy as np + +from .additions.fusion_utils import BaseFusionClassifier +from ..multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ..utils.dataset import get_samples_views_indices +from ..utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionCB" + + +class EarlyFusionCB(BaseMultiviewClassifier, BaseFusionClassifier): + """ + Builds a monoview dataset by concatenating the views (with a weight if + needed) and learns a monoview classifier on the concatenation + """ + + def __init__(self, random_state=None, view_weights=None, + monoview_classifier_name="cb_boost", + monoview_classifier_config={}): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + self.view_weights = view_weights + self.monoview_classifier_name = monoview_classifier_name + self.short_name = "early_fusion_cb" + if monoview_classifier_name in monoview_classifier_config: + self.monoview_classifier_config = monoview_classifier_config[ + monoview_classifier_name] + self.monoview_classifier_config = monoview_classifier_config + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, monoview_classifier_config) + self.param_names = ["monoview_classifier_config"] + self.distribs = [get_available_monoview_classifiers(), + ConfigGenerator(get_available_monoview_classifiers())] + self.classed_params = [] + self.weird_strings = {} + + def set_params(self, monoview_classifier_name="cb_boost", + monoview_classifier_config={}, **params): + self.monoview_classifier_name = monoview_classifier_name + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, + monoview_classifier_config) + self.monoview_classifier_config = self.monoview_classifier.get_params() + self.short_name = "early_fusion_cb" + return self + + def get_params(self, deep=True): + return {"random_state": self.random_state, + "view_weights": self.view_weights, + "monoview_classifier_name": self.monoview_classifier_name, + "monoview_classifier_config": self.monoview_classifier_config} + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + self.monoview_classifier_config = self.monoview_classifier.get_params() + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + if self.view_weights is None: + self.view_weights = np.ones(len(self.view_indices), dtype=float) + else: + self.view_weights = np.array(self.view_weights) + self.view_weights /= float(np.sum(self.view_weights)) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for view_weight, (index, view_idx) + in zip(self.view_weights, enumerate(self.view_indices))], axis=1) + return monoview_data + + # def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): + # if monoview_classifier_name in monoview_classifier_config: + # self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) + # else: + # self.monoview_classifier.set_params(**monoview_classifier_config) diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_dt.py b/summit/multiview_platform/multiview_classifiers/early_fusion_dt.py new file mode 100644 index 0000000000000000000000000000000000000000..6c79c78416157105bb158629a781fb00d44d6e8b --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_dt.py @@ -0,0 +1,103 @@ +import numpy as np + +from .additions.fusion_utils import BaseFusionClassifier +from ..multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ..utils.dataset import get_samples_views_indices +from ..utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionDT" + + +class EarlyFusionDT(BaseMultiviewClassifier, BaseFusionClassifier): + """ + Builds a monoview dataset by concatenating the views (with a weight if + needed) and learns a monoview classifier on the concatenation + """ + + def __init__(self, random_state=None, view_weights=None, + monoview_classifier_name="decision_tree", + monoview_classifier_config={"max_depth":100}): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + self.view_weights = view_weights + self.monoview_classifier_name = monoview_classifier_name + self.short_name = "early_fusion_dt" + if monoview_classifier_name in monoview_classifier_config: + self.monoview_classifier_config = monoview_classifier_config[ + monoview_classifier_name] + self.monoview_classifier_config = monoview_classifier_config + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, monoview_classifier_config) + self.param_names = ["monoview_classifier_config"] + self.distribs = [get_available_monoview_classifiers(), + ConfigGenerator(get_available_monoview_classifiers())] + self.classed_params = [] + self.weird_strings = {} + + def set_params(self, monoview_classifier_name="decision_tree", + monoview_classifier_config={}, **params): + self.monoview_classifier_name = monoview_classifier_name + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, + monoview_classifier_config) + self.monoview_classifier_config = self.monoview_classifier.get_params() + self.short_name = "early_fusion_dt" + return self + + def get_params(self, deep=True): + return {"random_state": self.random_state, + "view_weights": self.view_weights, + "monoview_classifier_name": self.monoview_classifier_name, + "monoview_classifier_config": self.monoview_classifier_config} + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + self.monoview_classifier_config = self.monoview_classifier.get_params() + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + if self.view_weights is None: + self.view_weights = np.ones(len(self.view_indices), dtype=float) + else: + self.view_weights = np.array(self.view_weights) + self.view_weights /= float(np.sum(self.view_weights)) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for view_weight, (index, view_idx) + in zip(self.view_weights, enumerate(self.view_indices))], axis=1) + return monoview_data + + # def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): + # if monoview_classifier_name in monoview_classifier_config: + # self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) + # else: + # self.monoview_classifier.set_params(**monoview_classifier_config) diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_rf.py b/summit/multiview_platform/multiview_classifiers/early_fusion_rf.py new file mode 100644 index 0000000000000000000000000000000000000000..e90ff96a087b729f7441169eacffc6b58145f527 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_rf.py @@ -0,0 +1,103 @@ +import numpy as np + +from .additions.fusion_utils import BaseFusionClassifier +from ..multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ..utils.dataset import get_samples_views_indices +from ..utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionRF" + + +class EarlyFusionRF(BaseMultiviewClassifier, BaseFusionClassifier): + """ + Builds a monoview dataset by concatenating the views (with a weight if + needed) and learns a monoview classifier on the concatenation + """ + + def __init__(self, random_state=None, view_weights=None, + monoview_classifier_name="random_forest", + monoview_classifier_config={}): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + self.view_weights = view_weights + self.monoview_classifier_name = monoview_classifier_name + self.short_name = "early_fusion_rf" + if monoview_classifier_name in monoview_classifier_config: + self.monoview_classifier_config = monoview_classifier_config[ + monoview_classifier_name] + self.monoview_classifier_config = monoview_classifier_config + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, monoview_classifier_config) + self.param_names = ["monoview_classifier_config"] + self.distribs = [get_available_monoview_classifiers(), + ConfigGenerator(get_available_monoview_classifiers())] + self.classed_params = [] + self.weird_strings = {} + + def set_params(self, monoview_classifier_name="random_forest", + monoview_classifier_config={}, **params): + self.monoview_classifier_name = monoview_classifier_name + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, + monoview_classifier_config) + self.monoview_classifier_config = self.monoview_classifier.get_params() + self.short_name = "early_fusion_rf" + return self + + def get_params(self, deep=True): + return {"random_state": self.random_state, + "view_weights": self.view_weights, + "monoview_classifier_name": self.monoview_classifier_name, + "monoview_classifier_config": self.monoview_classifier_config} + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + self.monoview_classifier_config = self.monoview_classifier.get_params() + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + if self.view_weights is None: + self.view_weights = np.ones(len(self.view_indices), dtype=float) + else: + self.view_weights = np.array(self.view_weights) + self.view_weights /= float(np.sum(self.view_weights)) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for view_weight, (index, view_idx) + in zip(self.view_weights, enumerate(self.view_indices))], axis=1) + return monoview_data + + # def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): + # if monoview_classifier_name in monoview_classifier_config: + # self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) + # else: + # self.monoview_classifier.set_params(**monoview_classifier_config) diff --git a/summit/multiview_platform/multiview_classifiers/early_fusion_svm.py b/summit/multiview_platform/multiview_classifiers/early_fusion_svm.py new file mode 100644 index 0000000000000000000000000000000000000000..802038a860be6e8c6c6f4f584864832ae0e8e6e4 --- /dev/null +++ b/summit/multiview_platform/multiview_classifiers/early_fusion_svm.py @@ -0,0 +1,103 @@ +import numpy as np + +from .additions.fusion_utils import BaseFusionClassifier +from ..multiview.multiview_utils import get_available_monoview_classifiers, \ + BaseMultiviewClassifier, ConfigGenerator +from ..utils.dataset import get_samples_views_indices +from ..utils.multiclass import get_mc_estim, MultiClassWrapper + +# from ..utils.dataset import get_v + +classifier_class_name = "EarlyFusionSVM" + + +class EarlyFusionSVM(BaseMultiviewClassifier, BaseFusionClassifier): + """ + Builds a monoview dataset by concatenating the views (with a weight if + needed) and learns a monoview classifier on the concatenation + """ + + def __init__(self, random_state=None, view_weights=None, + monoview_classifier_name="svm_rbf", + monoview_classifier_config={}): + BaseMultiviewClassifier.__init__(self, random_state=random_state) + self.view_weights = view_weights + self.monoview_classifier_name = monoview_classifier_name + self.short_name = "early_fusion_svm" + if monoview_classifier_name in monoview_classifier_config: + self.monoview_classifier_config = monoview_classifier_config[ + monoview_classifier_name] + self.monoview_classifier_config = monoview_classifier_config + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, monoview_classifier_config) + self.param_names = ["monoview_classifier_config"] + self.distribs = [get_available_monoview_classifiers(), + ConfigGenerator(get_available_monoview_classifiers())] + self.classed_params = [] + self.weird_strings = {} + + def set_params(self, monoview_classifier_name="svm_rbf", + monoview_classifier_config={}, **params): + self.monoview_classifier_name = monoview_classifier_name + self.monoview_classifier = self.init_monoview_estimator( + monoview_classifier_name, + monoview_classifier_config) + self.monoview_classifier_config = self.monoview_classifier.get_params() + self.short_name = "early_fusion_svm" + return self + + def get_params(self, deep=True): + return {"random_state": self.random_state, + "view_weights": self.view_weights, + "monoview_classifier_name": self.monoview_classifier_name, + "monoview_classifier_config": self.monoview_classifier_config} + + def fit(self, X, y, train_indices=None, view_indices=None): + train_indices, X = self.transform_data_to_monoview(X, train_indices, + view_indices) + self.used_views = view_indices + if np.unique(y[train_indices]).shape[0] > 2 and \ + not (isinstance(self.monoview_classifier, MultiClassWrapper)): + self.monoview_classifier = get_mc_estim(self.monoview_classifier, + self.random_state, + multiview=False, + y=y[train_indices]) + self.monoview_classifier.fit(X, y[train_indices]) + self.monoview_classifier_config = self.monoview_classifier.get_params() + return self + + def predict(self, X, sample_indices=None, view_indices=None): + _, X = self.transform_data_to_monoview(X, sample_indices, view_indices) + self._check_views(self.view_indices) + predicted_labels = self.monoview_classifier.predict(X) + return predicted_labels + + def transform_data_to_monoview(self, dataset, sample_indices, + view_indices): + """Here, we extract the data from the HDF5 dataset file and store all + the concatenated views in one variable""" + sample_indices, self.view_indices = get_samples_views_indices(dataset, + sample_indices, + view_indices) + if self.view_weights is None: + self.view_weights = np.ones(len(self.view_indices), dtype=float) + else: + self.view_weights = np.array(self.view_weights) + self.view_weights /= float(np.sum(self.view_weights)) + + X = self.hdf5_to_monoview(dataset, sample_indices) + return sample_indices, X + + def hdf5_to_monoview(self, dataset, samples): + """Here, we concatenate the views for the asked samples """ + monoview_data = np.concatenate( + [dataset.get_v(view_idx, samples) + for view_weight, (index, view_idx) + in zip(self.view_weights, enumerate(self.view_indices))], axis=1) + return monoview_data + + # def set_monoview_classifier_config(self, monoview_classifier_name, monoview_classifier_config): + # if monoview_classifier_name in monoview_classifier_config: + # self.monoview_classifier.set_params(**monoview_classifier_config[monoview_classifier_name]) + # else: + # self.monoview_classifier.set_params(**monoview_classifier_config) diff --git a/summit/multiview_platform/multiview_classifiers/mucombo.py b/summit/multiview_platform/multiview_classifiers/mucombo.py index ac2e4fe131a436ccb9f777de164673e3d92d2851..ad5268e6e052ce0b61bbbfb7c171b96facd9f1ad 100644 --- a/summit/multiview_platform/multiview_classifiers/mucombo.py +++ b/summit/multiview_platform/multiview_classifiers/mucombo.py @@ -1,7 +1,7 @@ from sklearn.tree import DecisionTreeClassifier -from multimodal.boosting.cumbo import MuCumboClassifier +from multimodal.boosting.combo import MuComboClassifier from ..multiview.multiview_utils import BaseMultiviewClassifier from ..utils.hyper_parameter_search import CustomRandint from ..utils.dataset import get_samples_views_indices @@ -10,14 +10,14 @@ from ..utils.base import base_boosting_estimators classifier_class_name = "MuCumbo" -class MuCumbo(BaseMultiviewClassifier, MuCumboClassifier): +class MuCumbo(BaseMultiviewClassifier, MuComboClassifier): def __init__(self, base_estimator=None, n_estimators=50, random_state=None,**kwargs): BaseMultiviewClassifier.__init__(self, random_state) base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs) - MuCumboClassifier.__init__(self, base_estimator=base_estimator, + MuComboClassifier.__init__(self, base_estimator=base_estimator, n_estimators=n_estimators, random_state=random_state,) self.param_names = ["base_estimator", "n_estimators", "random_state",] @@ -31,7 +31,7 @@ class MuCumbo(BaseMultiviewClassifier, MuCumboClassifier): self.used_views = view_indices numpy_X, view_limits = X.to_numpy_array(sample_indices=train_indices, view_indices=view_indices) - return MuCumboClassifier.fit(self, numpy_X, y[train_indices], + return MuComboClassifier.fit(self, numpy_X, y[train_indices], view_limits) def predict(self, X, sample_indices=None, view_indices=None): @@ -41,7 +41,7 @@ class MuCumbo(BaseMultiviewClassifier, MuCumboClassifier): self._check_views(view_indices) numpy_X, view_limits = X.to_numpy_array(sample_indices=sample_indices, view_indices=view_indices) - return MuCumboClassifier.predict(self, numpy_X) + return MuComboClassifier.predict(self, numpy_X) def get_interpretation(self, directory, base_file_name, labels, multiclass=False): diff --git a/summit/multiview_platform/multiview_classifiers/mumbo.py b/summit/multiview_platform/multiview_classifiers/mumbo.py index 4b77ab6ec6cadede645272bf2d89e1f9598f658d..e631cbc2ff0c053b82b055595e977d6a9844bc74 100644 --- a/summit/multiview_platform/multiview_classifiers/mumbo.py +++ b/summit/multiview_platform/multiview_classifiers/mumbo.py @@ -1,4 +1,5 @@ from sklearn.tree import DecisionTreeClassifier +from sklearn.base import BaseEstimator import numpy as np import os @@ -20,7 +21,15 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier): random_state=None, best_view_mode="edge", **kwargs): BaseMultiviewClassifier.__init__(self, random_state) - base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs) + if type(base_estimator) is list: + if type(base_estimator[0]) is dict: + base_estimator = [self.set_base_estim_from_dict(estim, **kwargs) for estim in base_estimator] + elif isinstance(base_estimator[0], BaseEstimator): + base_estimator = base_estimator + else: + raise ValueError("base_estimator should ba a list of dict or a sklearn classifier list") + else: + base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs) MumboClassifier.__init__(self, base_estimator=base_estimator, n_estimators=n_estimators, random_state=random_state, @@ -103,3 +112,7 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier): interpret_string +="\n The boosting process selected views : \n" + ", ".join(map(str, self.best_views_)) interpret_string+="\n\n With estimator weights : \n"+ "\n".join(map(str,self.estimator_weights_/np.sum(self.estimator_weights_))) return interpret_string + + def accepts_multi_class(self, random_state, n_samples=10, dim=2, + n_classes=3, n_views=2): + return True diff --git a/summit/multiview_platform/multiview_classifiers/mv_cb_boost.py b/summit/multiview_platform/multiview_classifiers/mv_cb_boost.py index a4062ba0e13b4111fe9396c065caca7a7a77504f..2b399ba4a0c3ea052d7c9f205ed464d3ef7d7961 100644 --- a/summit/multiview_platform/multiview_classifiers/mv_cb_boost.py +++ b/summit/multiview_platform/multiview_classifiers/mv_cb_boost.py @@ -8,9 +8,9 @@ class MVCBBoost(MultiviewCBoundBoostingAdapt): self_complemented=True, twice_the_same=False, random_start=False, - n_stumps=10, + n_stumps=100, c_bound_sol=True, - base_estimator="Trees", + base_estimator="Stumps", max_depth=1, mincq_tracking=False, weight_add=3, @@ -21,7 +21,7 @@ class MVCBBoost(MultiviewCBoundBoostingAdapt): min_cq_mu=10e-3, sig_mult=15, sig_offset=5, - use_previous_voters=False, **kwargs): + use_previous_voters=True, **kwargs): MultiviewCBoundBoostingAdapt.__init__(self, n_estimators=n_estimators, random_state=random_state, self_complemented=self_complemented, twice_the_same=twice_the_same, random_start=random_start, n_stumps=n_stumps, c_bound_sol=c_bound_sol, max_depth=max_depth,