Skip to content
Snippets Groups Projects
Commit acc0dd23 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Cuisine

parent d60441fd
No related branches found
No related tags found
No related merge requests found
......@@ -640,9 +640,9 @@ def exec_classif(arguments): # pragma: no cover
k_folds = execution.gen_k_folds(stats_iter, args["nb_folds"],
stats_iter_random_states)
dataset_files = dataset.init_multiple_datasets(args["pathf"],
args["name"],
nb_cores)
# dataset_files = dataset.init_multiple_datasets(args["pathf"],
# args["name"],
# nb_cores)
views, views_indices, all_views = execution.init_views(dataset_var,
args[
......
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix as metric
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, **kwargs):
score = metric(y_true, y_pred, **kwargs)
if score[0,0]+score[0,1] !=0:
return score[0,0]/(score[0,0]+score[0,1])
else:
return 0
def get_scorer(**kwargs):
return make_scorer(score, greater_is_better=True, **kwargs)
def get_config(**kwargs):
configString = "Specificity score (higher is better)".format(kwargs)
return configString
\ No newline at end of file
......@@ -120,6 +120,14 @@ def exec_monoview(directory, X, Y, database_name, labels_names,
test_pred = classifier.predict(X_test)
pred_duration = time.monotonic() - pred_beg
#### ROC CURVE ADDITION ###
from sklearn.metrics import roc_curve
fpr, tpr, _ = roc_curve(y_test, classifier.predict_proba(X_test)[:, 1])
np.savetxt(os.path.join(directory, classifier_class_name+"-fpr.npy"), fpr)
np.savetxt(os.path.join(directory, classifier_class_name + "-tpr.npy"), tpr)
### END ROC ###
# Filling the full prediction in the right order
full_pred = np.zeros(Y.shape, dtype=int) - 100
for train_index, index in enumerate(classification_indices[0]):
......
from imblearn.ensemble import BalancedBaggingClassifier
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from ..monoview.monoview_utils import BaseMonoviewClassifier
from ..utils.base import base_boosting_estimators
......@@ -27,5 +28,14 @@ class ImbalanceBagging(BaseMonoviewClassifier, BalancedBaggingClassifier):
self.weird_strings = {"base_estimator": "class_name"}
self.base_estimator_config = base_estimator_config
def fit(self, X, y):
BalancedBaggingClassifier.fit(self, X, y)
self.feature_importances_ = np.zeros(X.shape[1])
for estim in self.estimators_:
if hasattr(estim['classifier'], 'feature_importances_'):
self.feature_importances_ += estim['classifier'].feature_importances_
self.feature_importances_ /= np.sum(self.feature_importances_)
return self
......@@ -33,6 +33,7 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
pred_train=False,
forced_diversity=False,
normalize_dists=False,
class_weight="balanced",
**kwargs):
"""
......@@ -54,18 +55,22 @@ class SamBAClf(NeighborHoodClassifier, BaseMonoviewClassifier):
normalizer=normalizer,
forced_diversity=forced_diversity,
b=b, a=a, pred_train=pred_train,
normalize_dists=normalize_dists)
self.param_names = ["n_estimators", "relevance", "distance",
normalize_dists=normalize_dists,
class_weight=class_weight)
self.param_names = ["n_estimators",
"relevance",
"distance",
"train_weighting", "b", "pred_train", "normalizer",
"normalize_dists", "a"]
"normalize_dists", "a", "class_weight"]
self.distribs = [CustomRandint(low=1, high=70),
[ExpRelevance()],
[EuclidianDist(), PolarDist(), ExpEuclidianDist()],
[EuclidianDist(), PolarDist(), ExpEuclidianDist(), Jaccard()],
[ExpTrainWeighting()],
CustomUniform(0.1, 6,),
[True, False],
[RobustScaler()],
[True], CustomRandint(0, 10, 'e-')]
[True], CustomRandint(0, 10, 'e-'),
["balanced", None]]
self.classed_params = []
self.weird_strings = {}
......
from scm_bagging.scm_bagging_classifier import ScmBaggingClassifier
from randomscm.randomscm import RandomScmClassifier
from ..monoview.monoview_utils import BaseMonoviewClassifier
......@@ -16,7 +17,8 @@ from six import iteritems
MAX_INT = np.iinfo(np.int32).max
class ScmBaggingMinCq(ScmBaggingClassifier, BaseMonoviewClassifier):
class ScmBaggingMinCq(RandomScmClassifier, BaseMonoviewClassifier):
"""A Bagging classifier. for SetCoveringMachineClassifier()
The base estimators are built on subsets of both samples
and features.
......
......@@ -140,7 +140,8 @@ def plot_feature_relevance(file_name, feature_importance,
for score in score_df.columns:
if len(score.split("-"))>1:
algo, view = score.split("-")
feature_importance[algo].loc[[ind for ind in feature_importance.index if ind.startswith(view)]]*=score_df[score]['test']
list_ind = [ind for ind in feature_importance.index if ind.startswith(view)]
feature_importance[algo].loc[list_ind]*=2*(score_df[score]['test']-0.5)
else:
feature_importance[score] *= score_df[score]['test']
file_name+="_relevance"
......
......@@ -253,8 +253,10 @@ class ResultAnalyser():
metric_module = getattr(metrics, metric)
else:
metric_module = getattr(metrics, metric[:-1])
class_train_scores = []
class_test_scores = []
if metric not in ["roc_auc_score", "specificity_score"]:
for label_value in np.unique(self.labels):
train_sample_indices = self.train_indices[
np.where(self.labels[self.train_indices] == label_value)[0]]
......@@ -268,6 +270,10 @@ class ResultAnalyser():
metric_module.score(y_true=self.labels[test_sample_indices],
y_pred=self.pred[test_sample_indices],
**metric_kwargs))
else:
for _ in np.unique(self.labels):
class_train_scores.append(0)
class_test_scores.append(0)
train_score = metric_module.score(
y_true=self.labels[self.train_indices],
y_pred=self.pred[self.train_indices],
......
......@@ -503,7 +503,7 @@ class HDF5Dataset(Dataset):
seleted labels' names
"""
selected_labels = self.get_labels(sample_indices)
if decode:
if type(self.dataset["Labels"].attrs["names"][0]) == bytes:
return [label_name.decode("utf-8")
for label, label_name in
enumerate(self.dataset["Labels"].attrs["names"])
......@@ -619,10 +619,14 @@ class HDF5Dataset(Dataset):
view_names = self.init_view_names(view_names)
new_dataset_file["Metadata"].attrs["nbView"] = len(view_names)
for new_index, view_name in enumerate(view_names):
del new_dataset_file["Metadata"]["feature_ids-View{}".format(new_index)]
new_dataset_file["Metadata"]["feature_ids-View{}".format(new_index)] = new_dataset_file["Metadata"]["feature_ids-View{}".format(self.view_dict[view_name])]
del new_dataset_file["Metadata"]["feature_ids-View{}".format(self.view_dict[view_name])]
self.copy_view(target_dataset=new_dataset_file,
source_view_name=view_name,
target_view_index=new_index,
sample_indices=sample_indices)
new_dataset_file.close()
self.update_hdf5_dataset(dataset_file_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment