diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 3f32dbaae718bc407956e958b8473ca1e6119679..d63dd748e940dd1bb5e788aeb127f405a2c3227c 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -191,7 +191,7 @@ class ClassifiersGenerator(BaseEstimator, TransformerMixin): class TreeClassifiersGenerator(ClassifiersGenerator): - def __init__(self, random_state, max_depth=2, self_complemented=True, criterion="gini", splitter="best", n_trees=100, distribution_type="uniform", low=0, high=10): + def __init__(self, random_state, max_depth=2, self_complemented=True, criterion="gini", splitter="best", n_trees=100, distribution_type="uniform", low=0, high=10, attributes_ratio=0.6, examples_ratio=0.95): super(TreeClassifiersGenerator, self).__init__(self_complemented) self.max_depth=max_depth self.criterion=criterion @@ -201,17 +201,32 @@ class TreeClassifiersGenerator(ClassifiersGenerator): self.distribution_type = distribution_type self.low = low self.high = high + self.attributes_ratio = attributes_ratio + self.examples_ratio = examples_ratio def fit(self, X, y=None): estimators_ = [] - self.distributions = np.zeros((self.n_trees, X.shape[0])) - distrib_method = getattr(self.random_state, self.distribution_type) + self.attribute_indices = [self.sub_sample_attributes(X) for _ in range(self.n_trees)] + self.example_indices = [self.sub_sample_examples(X) for _ in range(self.n_trees)] for i in range(self.n_trees): - self.distributions[i,:] = distrib_method(self.low, self.high, size=X.shape[0]) - estimators_.append(DecisionTreeClassifier(criterion=self.criterion, splitter=self.splitter, max_depth=self.max_depth).fit(X, y, sample_weight=self.distributions[i,:])) + estimators_.append(DecisionTreeClassifier(criterion=self.criterion, splitter=self.splitter, max_depth=self.max_depth).fit(X[:, self.attribute_indices[i]][self.example_indices[i], :], y[self.example_indices[i]])) self.estimators_ = np.asarray(estimators_) return self + def sub_sample_attributes(self, X): + n_attributes = X.shape[1] + attributes_indices = np.arange(n_attributes) + kept_indices = self.random_state.choice(attributes_indices, size=int(self.attributes_ratio*n_attributes), replace=True) + return kept_indices + + def sub_sample_examples(self, X): + n_examples = X.shape[0] + examples_indices = np.arange(n_examples) + kept_indices = self.random_state.choice(examples_indices, size=int(self.examples_ratio*n_examples), replace=True) + return kept_indices + + + class StumpsClassifiersGenerator(ClassifiersGenerator): """Decision Stump Voters transformer. @@ -753,8 +768,11 @@ def get_accuracy_graph(plotted_data, classifier_name, file_name, name="Accuracie class BaseBoost(object): - def _collect_probas(self, X): - return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_]) + def _collect_probas(self, X, sub_sampled=False): + if self.estimators_generator.__class__.__name__ == "TreeClassifiersGenerator": + return np.asarray([clf.predict_proba(X[:,attribute_indices]) for clf, attribute_indices in zip(self.estimators_generator.estimators_, self.estimators_generator.attribute_indices)]) + else: + return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_]) def _binary_classification_matrix(self, X): probas = self._collect_probas(X) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py index 9156bf763014a37f89778a1e693b6db564a09bcc..c0d43c74edb4e61c6214ea96a687c85c75680338 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/ExecClassifMonoView.py @@ -82,7 +82,6 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.debug("Start:\t Predicting") y_train_pred = classifier.predict(X_train) y_test_pred = classifier.predict(X_test) - print(np.unique(y_test_pred)) full_labels_pred = np.zeros(Y.shape, dtype=int)-100 for trainIndex, index in enumerate(classificationIndices[0]): full_labels_pred[index] = y_train_pred[trainIndex] diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index 7a9fa525c6ab7c01d977cb31e7f60c557772c7a7..5e80bbc76f20a29ae8e44f1847ec0fde2f919092 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -146,7 +146,6 @@ class BaseMonoviewClassifier(object): else: return self.weird_strings[param_name](self.get_params()[param_name]) else: - print(self.get_params()) return str(self.get_params()[param_name]) def getFeatureImportance(self, directory, nb_considered_feats=50): diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py index 16378a77d9b32d5ea02f61d071fbc81576f6862e..6f5aaf497956fa831fc044d3ea825882e955e4b4 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/AdaboostPregenTree.py @@ -18,7 +18,7 @@ __status__ = "Prototype" # Production, Development, Prototype class AdaboostPregenTree(AdaBoostClassifier, BaseMonoviewClassifier, PregenClassifier): def __init__(self, random_state=None, n_estimators=50, - base_estimator=None, n_stumps=1, self_complemeted=True, max_depth=2 , **kwargs): + base_estimator=None, n_stumps=1, self_complemeted=True, max_depth=2, **kwargs): super(AdaboostPregenTree, self).__init__( random_state=random_state, n_estimators=n_estimators, @@ -40,7 +40,7 @@ class AdaboostPregenTree(AdaBoostClassifier, BaseMonoviewClassifier, PregenClass def fit(self, X, y, sample_weight=None): begin = time.time() - pregen_X, pregen_y = self.pregen_voters(X, y) + pregen_X, pregen_y = self.pregen_voters(X, y, generator=self.estimators_generator) super(AdaboostPregenTree, self).fit(pregen_X, pregen_y, sample_weight=sample_weight) end = time.time() self.train_time = end-begin diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py index b92cd0445867a269dacc2e094ab8f93d1f60bdf1..dd6224cff74b0f63b5147d550d21e7b756290eec 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregen.py @@ -34,7 +34,7 @@ class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): pregen_X, _ = self.pregen_voters(X, y) list_files = os.listdir(".") - a = int(np.random.randint(0, 10000)) + a = int(self.random_state.randint(0, 10000)) if "pregen_x"+str(a)+".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" @@ -51,13 +51,14 @@ class SCMPregen(scm, BaseMonoviewClassifier, PregenClassifier): def predict(self, X): pregen_X, _ = self.pregen_voters(X) list_files = os.listdir(".") - if "pregen_x.csv" in list_files: - i = 0 - file_name = "pregen_x" + str(i) + ".csv" + a = int(self.random_state.randint(0, 10000)) + if "pregen_x"+str(a)+".csv" in list_files: + a = int(np.random.randint(0, 10000)) + file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: - i += 1 + a = int(np.random.randint(0, 10000)) else: - file_name = "pregen_x.csv" + file_name = "pregen_x"+str(a)+".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py index 9ce31788d118bc99c3c2c0d46deaa5b929268d2d..fd48a5ff5510edf5cad948704198f3e4fecfdd9a 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMPregenTree.py @@ -36,7 +36,7 @@ class SCMPregenTree(scm, BaseMonoviewClassifier, PregenClassifier): def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): pregen_X, _ = self.pregen_voters(X, y, generator="Trees") list_files = os.listdir(".") - a = int(np.random.randint(0, 10000)) + a = int(self.random_state.randint(0, 10000)) if "pregen_x"+str(a)+".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" @@ -51,9 +51,10 @@ class SCMPregenTree(scm, BaseMonoviewClassifier, PregenClassifier): return self def predict(self, X): - pregen_X, _ = self.pregen_voters(X, generator="Trees") + pregen_X, _ = self.pregen_voters(X,) list_files = os.listdir(".") - a = int(np.random.randint(0, 10000)) + print(list_files) + a = int(self.random_state.randint(0, 10000)) if "pregen_x"+str(a)+".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py index 62b8ee8ebaa16ea046810f448231ad0c9484a874..27745be1513fbe62cbb3f90bd0a22b36e5a04456 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsity.py @@ -43,7 +43,7 @@ class SCMSparsity(BaseMonoviewClassifier, PregenClassifier): def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): pregen_X, _ = self.pregen_voters(X, y) list_files = os.listdir(".") - a = int(np.random.randint(0, 10000)) + a = int(self.random_state.randint(0, 10000)) if "pregen_x"+str(a)+".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" @@ -65,13 +65,14 @@ class SCMSparsity(BaseMonoviewClassifier, PregenClassifier): def predict(self, X): pregen_X, _ = self.pregen_voters(X,) list_files = os.listdir(".") - if "pregen_x.csv" in list_files: - i = 0 - file_name = "pregen_x" + str(i) + ".csv" + a = int(self.random_state.randint(0, 10000)) + if "pregen_x"+str(a)+".csv" in list_files: + a = int(np.random.randint(0, 10000)) + file_name = "pregen_x" + str(a) + ".csv" while file_name in list_files: - i += 1 + a = int(np.random.randint(0, 10000)) else: - file_name="pregen_x.csv" + file_name = "pregen_x"+str(a)+".csv" np.savetxt(file_name, pregen_X, delimiter=',') place_holder = np.genfromtxt(file_name, delimiter=',') os.remove(file_name) diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py index bd883c61db2c039791f77788f6d8b8f238d7473b..e28a99f1c8faec17f7bac255812d0f0982ea6160 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/SCMSparsityTree.py @@ -43,7 +43,7 @@ class SCMSparsityTree(BaseMonoviewClassifier, PregenClassifier): def fit(self, X, y, tiebreaker=None, iteration_callback=None, **fit_params): pregen_X, _ = self.pregen_voters(X, y, generator="Trees") list_files = os.listdir(".") - a = int(np.random.randint(0, 10000)) + a = int(self.random_state.randint(0, 10000)) if "pregen_x"+str(a)+".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv" @@ -65,7 +65,7 @@ class SCMSparsityTree(BaseMonoviewClassifier, PregenClassifier): def predict(self, X): pregen_X, _ = self.pregen_voters(X, generator="Trees") list_files = os.listdir(".") - a = int(np.random.randint(0, 10000)) + a = int(self.random_state.randint(0, 10000)) if "pregen_x"+str(a)+".csv" in list_files: a = int(np.random.randint(0, 10000)) file_name = "pregen_x" + str(a) + ".csv"