Skip to content
Snippets Groups Projects
Unverified Commit 2c36a873 authored by Dominique Benielli's avatar Dominique Benielli Committed by GitHub
Browse files

Merge pull request #5 from kossi-kossivi/modifs

Fix bug in some Multimodal learning methods
parents 2e5930aa ecb1b89c
Branches
Tags
No related merge requests found
......@@ -12,4 +12,4 @@ plotly>=4.2.1
matplotlib>=3.1.1
tabulate>=0.8.6
pyscm-ml>=1.0.0
imbalanced-learn
\ No newline at end of file
imbalanced-learn>=0.10.1
\ No newline at end of file
......@@ -82,7 +82,7 @@ class ScmBagging(RandomScmClassifier, BaseMonoviewClassifier):
max_samples=max_samples,
max_features=max_features,
max_rules=max_rules,
p_options=p_options,
p=p_options,
model_type=model_type,
random_state=random_state)
self.param_names = ["n_estimators", "max_rules", "max_samples", "max_features", "model_type", "p_options", "random_state"]
......@@ -94,7 +94,7 @@ class ScmBagging(RandomScmClassifier, BaseMonoviewClassifier):
def set_params(self, p_options=[0.316], **kwargs):
if not isinstance(p_options, list):
p_options = [p_options]
kwargs["p_options"] = p_options
kwargs["p"] = p_options
for parameter, value in iteritems(kwargs):
setattr(self, parameter, value)
return self
......
......@@ -7,20 +7,20 @@ from ..utils.hyper_parameter_search import CustomRandint
from ..utils.dataset import get_samples_views_indices
from ..utils.base import base_boosting_estimators
classifier_class_name = "MuCumbo"
classifier_class_name = "MuCombo"
class MuCombo(BaseMultiviewClassifier, MuComboClassifier):
def __init__(self, estimator=None,
def __init__(self, base_estimator=None,
n_estimators=50,
random_state=None,**kwargs):
BaseMultiviewClassifier.__init__(self, random_state)
estimator = self.set_base_estim_from_dict(estimator, **kwargs)
MuComboClassifier.__init__(self, estimator=estimator,
base_estimator = self.set_base_estim_from_dict(base_estimator, **kwargs)
MuComboClassifier.__init__(self, base_estimator=base_estimator,
n_estimators=n_estimators,
random_state=random_state,)
self.param_names = ["estimator", "n_estimators", "random_state",]
self.param_names = ["base_estimator", "n_estimators", "random_state",]
self.distribs = [base_boosting_estimators,
CustomRandint(5,200), [random_state],]
......@@ -43,6 +43,12 @@ class MuCombo(BaseMultiviewClassifier, MuComboClassifier):
view_indices=view_indices)
return MuComboClassifier.predict(self, numpy_X)
def get_interpretation(self, directory, base_file_name, labels,
multiclass=False):
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
multi_class=False):
return ""
def set_base_estim_from_dict(self, dict):
key, args = list(dict.items())[0]
if key == "decision_tree":
return DecisionTreeClassifier(**args)
\ No newline at end of file
......@@ -13,19 +13,20 @@ from .. import monoview_classifiers
classifier_class_name = "Mumbo"
class Mumbo(BaseMultiviewClassifier, MumboClassifier):
def __init__(self, estimator=None,
def __init__(self, base_estimator=None,
n_estimators=50,
random_state=None,
best_view_mode="edge", **kwargs):
BaseMultiviewClassifier.__init__(self, random_state)
base_estimator = self.set_base_estim_from_dict(estimator, **kwargs)
MumboClassifier.__init__(self, base_estimator=estimator,
base_estimator = self.set_base_estim_from_dict(base_estimator)
MumboClassifier.__init__(self, base_estimator=base_estimator,
n_estimators=n_estimators,
random_state=random_state,
best_view_mode=best_view_mode)
self.param_names = ["estimator", "n_estimators", "random_state", "best_view_mode"]
self.param_names = ["base_estimator", "n_estimators", "random_state", "best_view_mode"]
self.distribs = [base_boosting_estimators,
CustomRandint(5, 200), [random_state], ["edge", "error"]]
......@@ -42,8 +43,7 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
self.base_estimator = self.set_base_estim_from_dict(estimator)
MumboClassifier.set_params(self, **params)
else:
MumboClassifier.set_params(self, estimator=estimator, **params)
MumboClassifier.set_params(self, base_estimator=estimator, **params)
def fit(self, X, y, train_indices=None, view_indices=None):
train_indices, view_indices = get_samples_views_indices(X,
......@@ -69,7 +69,8 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
view_indices=view_indices)
return MumboClassifier.predict(self, numpy_X)
def get_interpretation(self, directory, base_file_name, labels, multiclass=False):
def get_interpretation(self, directory, base_file_name, y_test, feature_ids,
multi_class=False):
self.view_importances = np.zeros(len(self.used_views))
self.feature_importances_ = [np.zeros(view_shape)
for view_shape in self.view_shapes]
......@@ -89,6 +90,9 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
np.savetxt(os.path.join(directory, "feature_importances",
base_file_name + view_name + "-feature_importances.csv"),
feature_importances, delimiter=',')
# CHANGE: Making self.feature_importances_ one array, so he can be easy to use in
# summit.multiview_platform.result_analysis.feature_importances.get_feature_importances
self.feature_importances_ = np.concatenate(self.feature_importances_)
self.view_importances /= np.sum(self.view_importances)
np.savetxt(os.path.join(directory, base_file_name + "view_importances.csv"), self.view_importances,
delimiter=',')
......@@ -101,5 +105,11 @@ class Mumbo(BaseMultiviewClassifier, MumboClassifier):
self.view_names[view_index],
self.view_importances[view_index])
interpret_string += "\n The boosting process selected views : \n" + ", ".join(map(str, self.best_views_))
interpret_string+="\n\n With estimator weights : \n"+ "\n".join(map(str,self.estimator_weights_/np.sum(self.estimator_weights_)))
interpret_string += "\n\n With estimator weights : \n" + "\n".join(
map(str, self.estimator_weights_ / np.sum(self.estimator_weights_)))
return interpret_string
def set_base_estim_from_dict(self, dict):
key, args = list(dict.items())[0]
if key == "decision_tree":
return DecisionTreeClassifier(**args)
......@@ -65,6 +65,8 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier, BaseFusionClassifier):
y=y[train_indices])
self.monoview_classifier.fit(X, y[train_indices])
self.monoview_classifier_config = self.monoview_classifier.get_params()
if hasattr(self.monoview_classifier, 'feature_importances_'):
self.feature_importances_ = self.monoview_classifier.feature_importances_
return self
def predict(self, X, sample_indices=None, view_indices=None):
......
......@@ -44,19 +44,22 @@ def get_feature_importances(result, feature_ids=None, view_names=None,):
v_feature_id]
feature_importances["mv"] = pd.DataFrame(index=feat_ids)
if hasattr(classifier_result.clf, 'feature_importances_'):
feature_importances["mv"][classifier_result.classifier_name] = classifier_result.clf.feature_importances_
feature_importances["mv"][classifier_result.get_classifier_name()] = classifier_result.clf.feature_importances_
else:
# HACK: Assigning a default features importances values to classifier that hasn't feature_importances_
# attribute (eg: Linear Late Fusion)
feature_importances["mv"][classifier_result.get_classifier_name()] = np.zeros(len(feature_importances["mv"].index))
return feature_importances
def publish_feature_importances(feature_importances, directory, database_name,
feature_stds=None, metric_scores=None): # pragma: no cover
# TODO: Manage the case with NAN values
importance_dfs = []
std_dfs = []
if not os.path.exists(os.path.join(directory, "feature_importances")):
os.mkdir(os.path.join(directory, "feature_importances"))
for view_name, feature_importance in feature_importances.items():
if view_name!="mv":
if feature_stds is not None:
feature_std = feature_stds[view_name]
else:
......@@ -65,33 +68,22 @@ def publish_feature_importances(feature_importances, directory, database_name,
columns=feature_importance.columns)
feature_std = feature_std.loc[feature_importance.index]
if view_name == "mv":
importance_dfs.append(feature_importance)
std_dfs.append(feature_std)
else:
importance_dfs.append(feature_importance.set_index(
pd.Index([view_name + "-" + ind for ind in list(feature_importance.index)])))
importance_dfs.append(feature_importance.set_index(pd.Index([view_name+"-"+ind for ind in list(feature_importance.index)])))
# importance_dfs.append(pd.DataFrame(index=[view_name+"-br"],
# columns=feature_importance.columns,
# data=np.zeros((1, len(
# feature_importance.columns)))))
std_dfs.append(feature_std.set_index(pd.Index([view_name + "-" + ind
for ind
in list(feature_std.index)])))
# std_dfs.append(pd.DataFrame(index=[view_name + "-br"],
# columns=feature_std.columns,
# data=np.zeros((1, len(
# feature_std.columns)))))
if len(importance_dfs) > 0:
feature_importances_df = pd.concat(importance_dfs)
feature_importances_df = feature_importances_df / feature_importances_df.sum(axis=0)
feature_std_df = pd.concat(std_dfs)
if "mv" in feature_importances:
feature_importances_df = pd.concat([feature_importances_df,feature_importances["mv"].loc[(feature_importances["mv"] != 0).any(axis=1), :]], axis=1).fillna(0)
if feature_stds is not None:
feature_std_df = pd.concat([feature_std_df, feature_stds["mv"]], axis=1,).fillna(0)
else:
fake = pd.DataFrame(data=np.zeros((feature_importances_df.shape[0], feature_importances["mv"].shape[1])),
index=feature_importances_df.index,
columns=feature_importances["mv"].columns).fillna(0)
feature_std_df = pd.concat([feature_std_df, fake], axis=1,).fillna(0)
plot_feature_importances(os.path.join(directory, "feature_importances",
database_name), feature_importances_df, feature_std_df)
if metric_scores is not None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment