Skip to content
Snippets Groups Projects
Commit 5cc4a555 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

name changed

parent 4d630a8a
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 1166 deletions
import warnings
from sklearn.metrics import fbeta_score as metric
from sklearn.metrics import make_scorer
warnings.warn("the fbeta_score module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, beta=2.0, average="micro", **kwargs):
score = metric(y_true, y_pred, beta=beta, average=average, **kwargs)
return score
def get_scorer(beta=2.0, average="micro", **kwargs):
return make_scorer(metric, greater_is_better=True, beta=beta,
average=average, **kwargs)
def get_config(beta=2.0, average="micro", **kwargs):
config_string = "F-beta score using beta: {}, average: {}, {} (higher is better)".format(
beta, average, kwargs)
return config_string
import warnings
from sklearn.metrics import hamming_loss as metric
from sklearn.metrics import make_scorer
warnings.warn("the hamming_loss module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
return make_scorer(metric, greater_is_better=False, **kwargs)
def get_config(**kwargs):
config_string = "Hamming loss using {} (lower is better)".format(kwargs)
return config_string
import warnings
from sklearn.metrics import jaccard_score as metric
from sklearn.metrics import make_scorer
warnings.warn("the jaccard_similarity_score module is deprecated",
DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
return make_scorer(metric, greater_is_better=True,
**kwargs)
def get_config(**kwargs):
config_string = "Jaccard_similarity score using {} (higher is better)".format(
kwargs)
return config_string
import warnings
from sklearn.metrics import log_loss as metric
from sklearn.metrics import make_scorer
warnings.warn("the log_loss module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
return make_scorer(metric, greater_is_better=False,
**kwargs)
def get_config(**kwargs):
config_string = "Log loss using {} (lower is better)".format(kwargs)
return config_string
import warnings
from sklearn.metrics import make_scorer
from sklearn.metrics import matthews_corrcoef as metric
warnings.warn("the matthews_corrcoef module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
score = metric(y_true, y_pred)
return score
def get_scorer(**kwargs):
return make_scorer(metric, greater_is_better=True)
def get_config(**kwargs):
config_string = "Matthews correlation coefficient (higher is better)"
return config_string
import warnings
from sklearn.metrics import make_scorer
from sklearn.metrics import precision_score as metric
warnings.warn("the precision_score module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, average='micro', multiclass=False, **kwargs):
score = metric(y_true, y_pred, average=average, **kwargs)
return score
def get_scorer(average='micro', **kwargs):
return make_scorer(metric, greater_is_better=True,
average=average, **kwargs)
def get_config(average='micro', **kwargs):
config_string = "Precision score using average: {}, {} (higher is better)".format(
average, kwargs)
return config_string
import warnings
from sklearn.metrics import make_scorer
from sklearn.metrics import recall_score as metric
warnings.warn("the recall_score module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, average='micro', **kwargs):
score = metric(y_true, y_pred, average=average, **kwargs)
return score
def get_scorer(average='micro', **kwargs):
return make_scorer(metric, greater_is_better=True,
average=average, **kwargs)
def get_config(average="micro", **kwargs):
configString = "Recall score using average: {}, {} (higher is better)".format(
average, kwargs)
return configString
import warnings
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score as metric
from sklearn.preprocessing import MultiLabelBinarizer
warnings.warn("the roc_auc_score module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
return make_scorer(metric, greater_is_better=True,
**kwargs)
def get_config(**kwargs):
configString = "ROC_AUC score using {}".format(kwargs)
return configString
import warnings
from sklearn.metrics import make_scorer
from sklearn.metrics import zero_one_loss as metric
warnings.warn("the zero_one_loss module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
return make_scorer(metric, greater_is_better=False,
**kwargs)
def get_config(**kwargs):
configString = "Zero_one loss using {} (lower is better)".format(kwargs)
return configString
# from . import ExecClassifMonoView, MonoviewUtils, analyzeResult
#!/usr/bin/env python
""" Execution: Script to perform a MonoView classification """
import logging # To create Log-Files
# Import built-in modules
import os # to geth path of the running script
import time # for time calculations
import h5py
# Import 3rd party modules
import numpy as np # for reading CSV-files and Series
from .monoview_utils import MonoviewResult, MonoviewResultAnalyzer
# Import own modules
from .. import monoview_classifiers
from ..utils import hyper_parameter_search
from ..utils.dataset import extract_subset, HDF5Dataset
from ..utils.multiclass import get_mc_estim
from ..utils.organization import secure_file_path
# Author-Info
__author__ = "Nikolas Huelsmann, Baptiste BAUVIN"
__status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25
def exec_monoview_multicore(directory, name, labels_names,
classification_indices,
k_folds, dataset_file_index, database_type,
path, random_state, labels,
hyper_param_search="randomized_search",
metrics=[["accuracy_score", None]], n_iter=30,
**args): # pragma: no cover
dataset_var = HDF5Dataset(
hdf5_file=h5py.File(path + name + str(dataset_file_index) + ".hdf5",
"r"))
neededViewIndex = args["view_index"]
X = dataset_var.get_v(neededViewIndex)
Y = labels
return exec_monoview(directory, X, Y, name, labels_names,
classification_indices, k_folds, 1, database_type,
path,
random_state, hyper_param_search=hyper_param_search,
metrics=metrics, n_iter=n_iter,
view_name=dataset_var.get_view_name(
args["view_index"]),
**args)
def exec_monoview(directory, X, Y, database_name, labels_names, classification_indices,
k_folds, nb_cores, databaseType, path,
random_state, hyper_param_search="Random",
metrics={"accuracy_score*":{}}, n_iter=30, view_name="",
hps_kwargs={}, **args):
logging.debug("Start:\t Loading data")
kwargs, \
t_start, \
view_name, \
classifier_name, \
X, \
learningRate, \
labelsString, \
output_file_name,\
directory,\
base_file_name = init_constants(args, X, classification_indices,
labels_names,
database_name, directory, view_name, )
logging.debug("Done:\t Loading data")
logging.debug(
"Info:\t Classification - Database:" + str(database_name) + " View:" + str(
view_name) + " train ratio:"
+ str(learningRate) + ", CrossValidation k-folds: " + str(
k_folds.n_splits) + ", cores:"
+ str(nb_cores) + ", algorithm : " + classifier_name)
logging.debug("Start:\t Determine Train/Test split")
X_train, y_train, X_test, y_test = init_train_test(X, Y,
classification_indices)
logging.debug("Info:\t Shape X_train:" + str(
X_train.shape) + ", Length of y_train:" + str(len(y_train)))
logging.debug("Info:\t Shape X_test:" + str(
X_test.shape) + ", Length of y_test:" + str(len(y_test)))
logging.debug("Done:\t Determine Train/Test split")
logging.debug("Start:\t Generate classifier args")
classifier_module = getattr(monoview_classifiers, classifier_name)
classifier_class_name = classifier_module.classifier_class_name
hyper_param_beg = time.monotonic()
cl_kwargs = get_hyper_params(classifier_module, hyper_param_search,
classifier_name,
classifier_class_name,
X_train, y_train,
random_state, output_file_name,
k_folds, nb_cores, metrics, kwargs,
**hps_kwargs)
hyper_param_duration = time.monotonic() - hyper_param_beg
logging.debug("Done:\t Generate classifier args")
logging.debug("Start:\t Training")
classifier = get_mc_estim(getattr(classifier_module,
classifier_class_name)
(random_state, **cl_kwargs),
random_state,
y=Y)
fit_beg = time.monotonic()
classifier.fit(X_train, y_train) # NB_CORES=nbCores,
fit_duration = time.monotonic() - fit_beg
logging.debug("Done:\t Training")
logging.debug("Start:\t Predicting")
train_pred = classifier.predict(X_train)
pred_beg = time.monotonic()
test_pred = classifier.predict(X_test)
pred_duration = time.monotonic() - pred_beg
# Filling the full prediction in the right order
full_pred = np.zeros(Y.shape, dtype=int) - 100
for trainIndex, index in enumerate(classification_indices[0]):
full_pred[index] = train_pred[trainIndex]
for testIndex, index in enumerate(classification_indices[1]):
full_pred[index] = test_pred[testIndex]
logging.debug("Done:\t Predicting")
whole_duration = time.monotonic() - t_start
logging.debug(
"Info:\t Duration for training and predicting: " + str(whole_duration) + "[s]")
logging.debug("Start:\t Getting results")
result_analyzer = MonoviewResultAnalyzer(view_name=view_name,
classifier_name=classifier_name,
shape=X.shape,
classifier=classifier,
classification_indices=classification_indices,
k_folds=k_folds,
hps_method=hyper_param_search,
metrics_dict=metrics,
n_iter=n_iter,
class_label_names=labels_names,
pred=full_pred,
directory=directory,
base_file_name=base_file_name,
labels=Y,
database_name=database_name,
nb_cores=nb_cores,
duration=whole_duration)
string_analysis, images_analysis, metrics_scores, class_metrics_scores, \
confusion_matrix = result_analyzer.analyze()
logging.debug("Done:\t Getting results")
logging.debug("Start:\t Saving preds")
save_results(string_analysis, output_file_name, full_pred, train_pred,
y_train, images_analysis, y_test, confusion_matrix)
logging.info("Done:\t Saving results")
view_index = args["view_index"]
return MonoviewResult(view_index, classifier_name, view_name,
metrics_scores, full_pred, cl_kwargs,
classifier, X_train.shape[1],
hyper_param_duration, fit_duration, pred_duration, class_metrics_scores)
def init_constants(args, X, classification_indices, labels_names,
name, directory, view_name):
try:
kwargs = args["args"]
except KeyError:
kwargs = args
t_start = time.monotonic()
cl_type = kwargs["classifier_name"]
learning_rate = float(len(classification_indices[0])) / (
len(classification_indices[0]) + len(classification_indices[1]))
labels_string = "-".join(labels_names)
cl_type_string = cl_type
directory = os.path.join(directory, cl_type_string, view_name,)
base_file_name = cl_type_string + '-' + name + "-" + view_name + "-"
output_file_name = os.path.join(directory, base_file_name)
secure_file_path(output_file_name)
return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string, output_file_name, directory, base_file_name
def init_train_test(X, Y, classification_indices):
train_indices, test_indices = classification_indices
X_train = extract_subset(X, train_indices)
X_test = extract_subset(X, test_indices)
y_train = Y[train_indices]
y_test = Y[test_indices]
return X_train, y_train, X_test, y_test
def get_hyper_params(classifier_module, search_method, classifier_module_name,
classifier_class_name, X_train, y_train,
random_state,
output_file_name, k_folds, nb_cores, metrics, kwargs,
**hps_kwargs):
if search_method != "None":
logging.debug(
"Start:\t " + search_method + " best settings for " + classifier_module_name)
classifier_hp_search = getattr(hyper_parameter_search, search_method)
estimator = getattr(classifier_module, classifier_class_name)(
random_state=random_state,
**kwargs[classifier_module_name])
estimator = get_mc_estim(estimator, random_state,
multiview=False, y=y_train)
hps = classifier_hp_search(estimator, scoring=metrics, cv=k_folds,
random_state=random_state,
framework="monoview", n_jobs=nb_cores,
**hps_kwargs)
hps.fit(X_train, y_train, **kwargs[classifier_module_name])
cl_kwargs = hps.get_best_params()
hps.gen_report(output_file_name)
logging.debug("Done:\t " + search_method + " best settings")
else:
cl_kwargs = kwargs[classifier_module_name]
return cl_kwargs
def save_results(string_analysis, output_file_name, full_labels_pred,
y_train_pred,
y_train, images_analysis, y_test, confusion_matrix): # pragma: no cover
logging.info(string_analysis)
output_text_file = open(output_file_name + 'summary.txt', 'w', encoding="utf-8")
output_text_file.write(string_analysis)
output_text_file.close()
np.savetxt(output_file_name+"confusion_matrix.csv", confusion_matrix,
delimiter=', ')
np.savetxt(output_file_name + "full_pred.csv",
full_labels_pred.astype(np.int16), delimiter=",")
np.savetxt(output_file_name + "train_pred.csv",
y_train_pred.astype(np.int16),
delimiter=",")
np.savetxt(output_file_name + "train_labels.csv", y_train.astype(np.int16),
delimiter=",")
np.savetxt(output_file_name + "test_labels.csv", y_test.astype(np.int16),
delimiter=",")
if images_analysis is not None:
for image_name in images_analysis:
if os.path.isfile(output_file_name + image_name + ".png"):
for i in range(1, 20):
test_file_name = output_file_name + image_name + "-" + str(
i) + ".png"
if not os.path.isfile(test_file_name):
images_analysis[image_name].savefig(test_file_name,
transparent=True)
break
images_analysis[image_name].savefig(
output_file_name + image_name + '.png', transparent=True)
import pickle
import os
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
from scipy.stats import uniform, randint
from ..utils.base import BaseClassifier, ResultAnalyser
from ..utils.hyper_parameter_search import CustomRandint, CustomUniform
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25
def change_label_to_minus(y):
"""
Change the label 0 to minus one
Parameters
----------
y :
Returns
-------
label y with -1 instead of 0
"""
minus_y = np.copy(y)
minus_y[np.where(y == 0)] = -1
return minus_y
def change_label_to_zero(y):
"""
Change the label -1 to 0
Parameters
----------
y
Returns
-------
"""
zeroed_y = np.copy(y)
zeroed_y[np.where(y == -1)] = 0
return zeroed_y
def compute_possible_combinations(params_dict):
n_possibs = np.ones(len(params_dict)) * np.inf
for value_index, value in enumerate(params_dict.values()):
if type(value) == list:
n_possibs[value_index] = len(value)
elif isinstance(value, CustomRandint):
n_possibs[value_index] = value.get_nb_possibilities()
return n_possibs
def gen_test_folds_preds(X_train, y_train, KFolds, estimator):
test_folds_preds = []
train_index = np.arange(len(y_train))
folds = KFolds.split(train_index, y_train)
fold_lengths = np.zeros(KFolds.n_splits, dtype=int)
for fold_index, (train_indices, test_indices) in enumerate(folds):
fold_lengths[fold_index] = len(test_indices)
estimator.fit(X_train[train_indices], y_train[train_indices])
test_folds_preds.append(estimator.predict(X_train[train_indices]))
min_fold_length = fold_lengths.min()
test_folds_preds = np.array(
[test_fold_preds[:min_fold_length] for test_fold_preds in
test_folds_preds])
return test_folds_preds
# class CustomRandint:
# """Used as a distribution returning a integer between low and high-1.
# It can be used with a multiplier agrument to be able to perform more complex generation
# for example 10 e -(randint)"""
#
# def __init__(self, low=0, high=0, multiplier=""):
# self.randint = randint(low, high)
# self.multiplier = multiplier
#
# def rvs(self, random_state=None):
# randinteger = self.randint.rvs(random_state=random_state)
# if self.multiplier == "e-":
# return 10 ** -randinteger
# else:
# return randinteger
#
# def get_nb_possibilities(self):
# return self.randint.b - self.randint.a
#
#
# class CustomUniform:
# """Used as a distribution returning a float between loc and loc + scale..
# It can be used with a multiplier agrument to be able to perform more complex generation
# for example 10 e -(float)"""
#
# def __init__(self, loc=0, state=1, multiplier=""):
# self.uniform = uniform(loc, state)
# self.multiplier = multiplier
#
# def rvs(self, random_state=None):
# unif = self.uniform.rvs(random_state=random_state)
# if self.multiplier == 'e-':
# return 10 ** -unif
# else:
# return unif
class BaseMonoviewClassifier(BaseClassifier):
def get_feature_importance(self, directory, base_file_name, nb_considered_feats=50):
"""Used to generate a graph and a pickle dictionary representing
feature importances"""
feature_importances = self.feature_importances_
sorted_args = np.argsort(-feature_importances)
feature_importances_sorted = feature_importances[sorted_args][
:nb_considered_feats]
feature_indices_sorted = sorted_args[:nb_considered_feats]
fig, ax = plt.subplots()
x = np.arange(len(feature_indices_sorted))
formatter = FuncFormatter(percent)
ax.yaxis.set_major_formatter(formatter)
plt.bar(x, feature_importances_sorted)
plt.title("Importance depending on feature")
fig.savefig(os.path.join(directory, base_file_name + "feature_importances.png")
, transparent=True)
plt.close()
features_importances_dict = dict((featureIndex, featureImportance)
for featureIndex, featureImportance in
enumerate(feature_importances)
if featureImportance != 0)
with open(directory + 'feature_importances.pickle', 'wb') as handle:
pickle.dump(features_importances_dict, handle)
interpret_string = "Feature importances : \n"
for featureIndex, featureImportance in zip(feature_indices_sorted,
feature_importances_sorted):
if featureImportance > 0:
interpret_string += "- Feature index : " + str(featureIndex) + \
", feature importance : " + str(
featureImportance) + "\n"
return interpret_string
def get_name_for_fusion(self):
return self.__class__.__name__[:4]
def percent(x, pos):
"""Used to print percentage of importance on the y axis"""
return '%1.1f %%' % (x * 100)
class MonoviewResult(object):
def __init__(self, view_index, classifier_name, view_name, metrics_scores,
full_labels_pred, classifier_config,
classifier, n_features, hps_duration, fit_duration,
pred_duration, class_metric_scores):
self.view_index = view_index
self.classifier_name = classifier_name
self.view_name = view_name
self.metrics_scores = metrics_scores
self.full_labels_pred = full_labels_pred
self.classifier_config = classifier_config
self.clf = classifier
self.n_features = n_features
self.hps_duration = hps_duration
self.fit_duration = fit_duration
self.pred_duration = pred_duration
self.class_metric_scores = class_metric_scores
def get_classifier_name(self):
return self.classifier_name + "-" + self.view_name
def get_accuracy_graph(plotted_data, classifier_name, file_name,
name="Accuracies", bounds=None, bound_name=None,
boosting_bound=None, set="train", zero_to_one=True): # pragma: no cover
if type(name) is not str:
name = " ".join(name.getConfig().strip().split(" ")[:2])
f, ax = plt.subplots(nrows=1, ncols=1)
if zero_to_one:
ax.set_ylim(bottom=0.0, top=1.0)
ax.set_title(name + " during " + set + " for " + classifier_name)
x = np.arange(len(plotted_data))
scat = ax.scatter(x, np.array(plotted_data), marker=".")
if bounds:
if boosting_bound:
scat2 = ax.scatter(x, boosting_bound, marker=".")
scat3 = ax.scatter(x, np.array(bounds), marker=".", )
ax.legend((scat, scat2, scat3),
(name, "Boosting bound", bound_name))
else:
scat2 = ax.scatter(x, np.array(bounds), marker=".", )
ax.legend((scat, scat2),
(name, bound_name))
# plt.tight_layout()
else:
ax.legend((scat,), (name,))
f.savefig(file_name, transparent=True)
plt.close()
class MonoviewResultAnalyzer(ResultAnalyser):
def __init__(self, view_name, classifier_name, shape, classifier,
classification_indices, k_folds, hps_method, metrics_dict,
n_iter, class_label_names, pred,
directory, base_file_name, labels, database_name, nb_cores, duration):
ResultAnalyser.__init__(self, classifier, classification_indices,
k_folds, hps_method, metrics_dict, n_iter,
class_label_names, pred,
directory, base_file_name, labels,
database_name, nb_cores, duration)
self.view_name = view_name
self.classifier_name = classifier_name
self.shape = shape
def get_base_string(self):
return "Classification on {} for {} with {}.\n\n".format(
self.database_name, self.view_name, self.classifier_name
)
def get_view_specific_info(self):
return "\t- View name : {}\t View shape : {}\n".format(self.view_name,
self.shape)
\ No newline at end of file
import os
for module in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if module == '__init__.py' or module[-3:] != '.py':
continue
__import__(module[:-3], locals(), globals(), [], 1)
del module
del os
"""
To be able to add a monoview Classifier to the benchmark, one has to :
Create a .py file named after the classifier
Define a canProbas function returning True or False whether the classifier is able to predict class probabilities
Define a fit function
Input :
DATASET : The data matrix used to fit the classifier
CLASS_LABELS : The labels' array of the training set
NB_CORES : The number of cores the classifier can use to train
kwargs : Any argument specific to the classifier
Output :
classifier : A classifier object, similar to the sk-learn classifier object
Define a ***Search that search hyper parameters for the algorithm. Check HP optimization methods to get all the
different functions to provide (returning the parameters in the order of the kwargs dict for the fit function)
Define a getKWARGS function
Input :
KWARGSList : The list of all the arguments as written in the argument parser
Output :
KWARGSDict : a dictionnary of arguments matching the kwargs needed in train
Define a getConfig function that returns a string explaining the algorithm's config using a config dict or list
Add the arguments to configure the classifier in the parser in exec_classif.py
"""
import time
import os
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from .. import metrics
from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier, \
get_accuracy_graph
from ..utils.base import base_boosting_estimators
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "Adaboost"
class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
"""
This class implement a Classifier with adaboost algorithm inherit from sklearn
AdaBoostClassifier
Parameters
----------
random_state : int seed, RandomState instance, or None (default=None)
The seed of the pseudo random number multiview_generator to use when
shuffling the data.
n_estimators : int number of estimators
base_estimator :
kwargs : others arguments
Attributes
----------
param_name :
classed_params :
distribs :
weird_strings :
plotted_metric : selection of metric to plot
plotted_metric_name : name of the metric to plot
step_predictions :
"""
def __init__(self, random_state=None, n_estimators=50,
base_estimator=None, base_estimator_config=None, **kwargs):
base_estimator = BaseMonoviewClassifier.get_base_estimator(self,
base_estimator,
base_estimator_config)
AdaBoostClassifier.__init__(self,
random_state=random_state,
n_estimators=n_estimators,
base_estimator=base_estimator,
algorithm="SAMME"
)
self.param_names = ["n_estimators", "base_estimator"]
self.classed_params = ["base_estimator"]
self.distribs = [CustomRandint(low=1, high=500),
base_boosting_estimators]
self.weird_strings = {"base_estimator": "class_name"}
self.plotted_metric = metrics.zero_one_loss
self.plotted_metric_name = "zero_one_loss"
self.step_predictions = None
def fit(self, X, y, sample_weight=None):
"""
Fit adaboost model
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
y : { array-like, shape (n_samples,)
Target values class labels in classification
sample_weight :
Returns
-------
self : object
Returns self.
"""
begin = time.time()
AdaBoostClassifier.fit(self, X, y, sample_weight=sample_weight)
end = time.time()
self.train_time = end - begin
self.train_shape = X.shape
self.base_predictions = np.array(
[estim.predict(X) for estim in self.estimators_])
self.metrics = np.array([self.plotted_metric.score(pred, y) for pred in
self.staged_predict(X)])
return self
def predict(self, X):
"""
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
For kernel="precomputed", the expected shape of X is
(n_samples, n_samples).
Returns
-------
predictions : ndarray of shape (n_samples, )
The estimated labels.
"""
begin = time.time()
pred = AdaBoostClassifier.predict(self, X)
end = time.time()
self.pred_time = end - begin
self.step_predictions = np.array(
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, base_file_name, y_test, multi_class=False): # pragma: no cover
interpretString = ""
interpretString += self.get_feature_importance(directory, base_file_name)
interpretString += "\n\n Estimator error | Estimator weight\n"
interpretString += "\n".join(
[str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
error, weight in
zip(self.estimator_errors_, self.estimator_weights_)])
step_test_metrics = np.array(
[self.plotted_metric.score(y_test, step_pred) for step_pred in
self.step_predictions])
get_accuracy_graph(step_test_metrics, "Adaboost",
os.path.join(directory, base_file_name +"test_metrics.png"),
self.plotted_metric_name, set="test")
np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
step_test_metrics,
delimiter=',')
np.savetxt(os.path.join(directory, base_file_name + "train_metrics.csv"),
self.metrics, delimiter=',')
np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
np.array([self.train_time, self.pred_time]), delimiter=',')
return interpretString
from sklearn.svm import SVC
class SVCClassifier(SVC):
def __init__(self, random_state=None, kernel='rbf', C=1.0, degree=3,
**kwargs):
super(SVCClassifier, self).__init__(
C=C,
kernel=kernel,
degree=degree,
probability=True,
max_iter=1000,
random_state=random_state
)
self.classed_params = []
self.weird_strings = {}
from sklearn.tree import DecisionTreeClassifier
from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "DecisionTree"
class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier):
def __init__(self, random_state=None, max_depth=None,
criterion='gini', splitter='best', **kwargs):
DecisionTreeClassifier.__init__(self,
max_depth=max_depth,
criterion=criterion,
splitter=splitter,
random_state=random_state
)
self.param_names = ["max_depth", "criterion", "splitter",
'random_state']
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=300),
["gini", "entropy"],
["best", "random"], [random_state]]
self.weird_strings = {}
def get_interpretation(self, directory, base_file_name, y_test,
multiclass=False):
interpretString = "First featrue : \n\t{} <= {}\n".format(
self.tree_.feature[0],
self.tree_.threshold[0])
interpretString += self.get_feature_importance(directory, base_file_name)
return interpretString
import time
import os
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from .. import metrics
from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier, \
get_accuracy_graph
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "GradientBoosting"
class CustomDecisionTreeGB(DecisionTreeClassifier):
def predict(self, X, check_input=True):
y_pred = DecisionTreeClassifier.predict(self, X,
check_input=check_input)
return y_pred.reshape((y_pred.shape[0], 1)).astype(float)
class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier):
def __init__(self, random_state=None, loss="exponential", max_depth=1.0,
n_estimators=100,
init=CustomDecisionTreeGB(max_depth=1),
**kwargs):
GradientBoostingClassifier.__init__(self,
loss=loss,
max_depth=max_depth,
n_estimators=n_estimators,
init=init,
random_state=random_state
)
self.param_names = ["n_estimators", "max_depth"]
self.classed_params = []
self.distribs = [CustomRandint(low=50, high=500),
CustomRandint(low=1, high=10),]
self.weird_strings = {}
self.plotted_metric = metrics.zero_one_loss
self.plotted_metric_name = "zero_one_loss"
self.step_predictions = None
def fit(self, X, y, sample_weight=None, monitor=None):
begin = time.time()
GradientBoostingClassifier.fit(self, X, y, sample_weight=sample_weight)
end = time.time()
self.train_time = end - begin
self.train_shape = X.shape
self.base_predictions = np.array(
[estim[0].predict(X) for estim in self.estimators_])
self.metrics = np.array(
[self.plotted_metric.score(pred, y) for pred in
self.staged_predict(X)])
# self.bounds = np.array([np.prod(
# np.sqrt(1 - 4 * np.square(0.5 - self.estimator_errors_[:i + 1]))) for i
# in range(self.estimator_errors_.shape[0])])
return self
def predict(self, X):
begin = time.time()
pred = GradientBoostingClassifier.predict(self, X)
end = time.time()
self.pred_time = end - begin
if X.shape != self.train_shape:
self.step_predictions = np.array(
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, base_file_name, y_test, multi_class=False):
interpretString = ""
if multi_class:
return interpretString
else:
interpretString += self.get_feature_importance(directory, base_file_name)
step_test_metrics = np.array(
[self.plotted_metric.score(y_test, step_pred) for step_pred in
self.step_predictions])
get_accuracy_graph(step_test_metrics, "AdaboostClassic",
directory + "test_metrics.png",
self.plotted_metric_name, set="test")
get_accuracy_graph(self.metrics, "AdaboostClassic",
directory + "metrics.png",
self.plotted_metric_name)
np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"), step_test_metrics,
delimiter=',')
np.savetxt(os.path.join(directory, base_file_name + "train_metrics.csv"), self.metrics,
delimiter=',')
np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
np.array([self.train_time, self.pred_time]),
delimiter=',')
return interpretString
from sklearn.neighbors import KNeighborsClassifier
from ..monoview.monoview_utils import CustomRandint, BaseMonoviewClassifier
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "KNN"
class KNN(KNeighborsClassifier, BaseMonoviewClassifier):
"""
Implement extention of KNeighborsClassifier of sklearn
for the usage of the multiview_platform.
Parameters
----------
random_state
n_neighbors
weights
algorithm
p
kwargs
"""
def __init__(self, random_state=None, n_neighbors=5,
weights='uniform', algorithm='auto', p=2, **kwargs):
KNeighborsClassifier.__init__(self,
n_neighbors=n_neighbors,
weights=weights,
algorithm=algorithm,
p=p
)
self.param_names = ["n_neighbors", "weights", "algorithm", "p",
"random_state", ]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=10), ["uniform", "distance"],
["auto", "ball_tree", "kd_tree", "brute"], [1, 2],
[random_state]]
self.weird_strings = {}
self.random_state = random_state
import numpy as np
from sklearn.linear_model import Lasso as LassoSK
from ..monoview.monoview_utils import CustomRandint, CustomUniform, \
BaseMonoviewClassifier
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
classifier_class_name = "Lasso"
class Lasso(LassoSK, BaseMonoviewClassifier):
"""
Parameters
----------
random_state :
alpha : float, optional
Constant that multiplies the L1 term. Defaults to 1.0.
``alpha = 0`` is equivalent to an ordinary least square, solved
by the :class:`LinearRegression` object. For numerical
reasons, using ``alpha = 0`` is with the Lasso object is
not advised
and you should prefer the LinearRegression object. (default( : 10)
max_iter : int The maximum number of iterations (default : 10)
warm_start : bool, optional
When set to True, reuse the solution of the previous call to fit as
initialization, otherwise, just erase the previous solution.
kwargs : others arguments
Attributes
----------
param_name :
classed_params :
distribs :
weird_strings :
"""
def __init__(self, random_state=None, alpha=1.0,
max_iter=10, warm_start=False, **kwargs):
LassoSK.__init__(self,
alpha=alpha,
max_iter=max_iter,
warm_start=warm_start,
random_state=random_state
)
self.param_names = ["max_iter", "alpha", "random_state"]
self.classed_params = []
self.distribs = [CustomRandint(low=1, high=300),
CustomUniform(), [random_state]]
self.weird_strings = {}
def fit(self, X, y, check_input=True):
neg_y = np.copy(y)
neg_y[np.where(neg_y == 0)] = -1
LassoSK.fit(self, X, neg_y)
# self.feature_importances_ = self.coef_/np.sum(self.coef_)
return self
def predict(self, X):
prediction = LassoSK.predict(self, X)
signed = np.sign(prediction)
signed[np.where(signed == -1)] = 0
return signed
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment