Skip to content
Snippets Groups Projects
Commit a3562f6d authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added base_file_name to get_interpret

parent 2c11235e
No related branches found
No related tags found
No related merge requests found
Showing
with 68 additions and 89 deletions
......@@ -63,7 +63,9 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
X, \
learningRate, \
labelsString, \
output_file_name = init_constants(args, X, classification_indices,
output_file_name,\
directory,\
base_file_name = init_constants(args, X, classification_indices,
labels_names,
database_name, directory, view_name, )
logging.debug("Done:\t Loading data")
......@@ -143,7 +145,8 @@ def exec_monoview(directory, X, Y, database_name, labels_names, classification_i
class_label_names=labels_names,
train_pred=train_pred,
test_pred=test_pred,
directory=output_file_name,
directory=directory,
base_file_name=base_file_name,
labels=Y,
database_name=database_name,
nb_cores=nb_cores,
......@@ -175,11 +178,11 @@ def init_constants(args, X, classification_indices, labels_names,
len(classification_indices[0]) + len(classification_indices[1]))
labels_string = "-".join(labels_names)
cl_type_string = cl_type
output_file_name = os.path.join(directory, cl_type_string, view_name,
cl_type_string + '-' + name + "-" +
view_name + "-")
directory = os.path.join(directory, cl_type_string, view_name,)
base_file_name = cl_type_string + '-' + name + "-" + view_name + "-"
output_file_name = os.path.join(directory, base_file_name)
secure_file_path(output_file_name)
return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string, output_file_name
return kwargs, t_start, view_name, cl_type, X, learning_rate, labels_string, output_file_name, directory, base_file_name
def init_train_test(X, Y, classification_indices):
......
import pickle
import os
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
......@@ -113,7 +114,7 @@ class CustomUniform:
class BaseMonoviewClassifier(BaseClassifier):
def get_feature_importance(self, directory, nb_considered_feats=50):
def get_feature_importance(self, directory, base_file_name, nb_considered_feats=50):
"""Used to generate a graph and a pickle dictionary representing
feature importances"""
feature_importances = self.feature_importances_
......@@ -127,7 +128,8 @@ class BaseMonoviewClassifier(BaseClassifier):
ax.yaxis.set_major_formatter(formatter)
plt.bar(x, feature_importances_sorted)
plt.title("Importance depending on feature")
fig.savefig(directory + "feature_importances.png", transparent=True)
fig.savefig(os.path.join(directory, base_file_name + "feature_importances.png")
, transparent=True)
plt.close()
features_importances_dict = dict((featureIndex, featureImportance)
for featureIndex, featureImportance in
......@@ -207,12 +209,12 @@ class MonoviewResultAnalyzer(ResultAnalyser):
def __init__(self, view_name, classifier_name, shape, classifier,
classification_indices, k_folds, hps_method, metrics_list,
n_iter, class_label_names, train_pred, test_pred,
directory, labels, database_name, nb_cores, duration):
directory, base_file_name, labels, database_name, nb_cores, duration):
ResultAnalyser.__init__(self, classifier, classification_indices,
k_folds, hps_method, metrics_list, n_iter,
class_label_names, train_pred, test_pred,
directory, labels, database_name, nb_cores,
duration)
directory, base_file_name, labels,
database_name, nb_cores, duration)
self.view_name = view_name
self.classifier_name = classifier_name
self.shape = shape
......
import time
import os
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
......@@ -129,9 +130,9 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, y_test, multi_class=False):
def get_interpretation(self, directory, base_file_name, y_test, multi_class=False):
interpretString = ""
interpretString += self.get_feature_importance(directory)
interpretString += self.get_feature_importance(directory, base_file_name)
interpretString += "\n\n Estimator error | Estimator weight\n"
interpretString += "\n".join(
[str(error) + " | " + str(weight / sum(self.estimator_weights_)) for
......@@ -141,11 +142,13 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
[self.plotted_metric.score(y_test, step_pred) for step_pred in
self.step_predictions])
get_accuracy_graph(step_test_metrics, "Adaboost",
directory + "test_metrics.png",
os.path.join(directory, base_file_name +"test_metrics.png"),
self.plotted_metric_name, set="test")
np.savetxt(directory + "test_metrics.csv", step_test_metrics,
np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"),
step_test_metrics,
delimiter=',')
np.savetxt(directory + "train_metrics.csv", self.metrics, delimiter=',')
np.savetxt(directory + "times.csv",
np.savetxt(os.path.join(directory, base_file_name + "train_metrics.csv"),
self.metrics, delimiter=',')
np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
np.array([self.train_time, self.pred_time]), delimiter=',')
return interpretString
......@@ -27,9 +27,10 @@ class DecisionTree(DecisionTreeClassifier, BaseMonoviewClassifier):
["best", "random"], [random_state]]
self.weird_strings = {}
def get_interpretation(self, directory, y_test):
def get_interpretation(self, directory, base_file_name, y_test,
multiclass=False):
interpretString = "First featrue : \n\t{} <= {}\n".format(
self.tree_.feature[0],
self.tree_.threshold[0])
interpretString += self.get_feature_importance(directory)
interpretString += self.get_feature_importance(directory, base_file_name)
return interpretString
import time
import os
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
......@@ -70,12 +71,12 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier):
[step_pred for step_pred in self.staged_predict(X)])
return pred
def get_interpretation(self, directory, y_test, multi_class=False):
def get_interpretation(self, directory, base_file_name, y_test, multi_class=False):
interpretString = ""
if multi_class:
return interpretString
else:
interpretString += self.get_feature_importance(directory)
interpretString += self.get_feature_importance(directory, base_file_name)
step_test_metrics = np.array(
[self.plotted_metric.score(y_test, step_pred) for step_pred in
self.step_predictions])
......@@ -85,11 +86,11 @@ class GradientBoosting(GradientBoostingClassifier, BaseMonoviewClassifier):
get_accuracy_graph(self.metrics, "AdaboostClassic",
directory + "metrics.png",
self.plotted_metric_name)
np.savetxt(directory + "test_metrics.csv", step_test_metrics,
np.savetxt(os.path.join(directory, base_file_name + "test_metrics.csv"), step_test_metrics,
delimiter=',')
np.savetxt(directory + "train_metrics.csv", self.metrics,
np.savetxt(os.path.join(directory, base_file_name + "train_metrics.csv"), self.metrics,
delimiter=',')
np.savetxt(directory + "times.csv",
np.savetxt(os.path.join(directory, base_file_name + "times.csv"),
np.array([self.train_time, self.pred_time]),
delimiter=',')
return interpretString
......@@ -40,7 +40,3 @@ class KNN(KNeighborsClassifier, BaseMonoviewClassifier):
[random_state]]
self.weird_strings = {}
self.random_state = random_state
def get_interpretation(self, directory, y_test, multiclass=False):
interpretString = ""
return interpretString
......@@ -72,20 +72,3 @@ class Lasso(LassoSK, BaseMonoviewClassifier):
signed = np.sign(prediction)
signed[np.where(signed == -1)] = 0
return signed
\ No newline at end of file
def get_interpretation(self, directory, y_test, multiclass=False):
"""
return the interpreted string
Parameters
----------
directory :
y_test :
Returns
-------
interpreted string, str interpret_string
"""
interpret_string = ""
return interpret_string
......@@ -65,7 +65,7 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier):
["gini", "entropy"], [random_state]]
self.weird_strings = {}
def get_interpretation(self, directory, y_test, multiclass=False):
def get_interpretation(self, directory, base_file_name, y_test, multiclass=False):
"""
Parameters
......@@ -78,5 +78,5 @@ class RandomForest(RandomForestClassifier, BaseMonoviewClassifier):
string for interpretation interpret_string
"""
interpret_string = ""
interpret_string += self.get_feature_importance(directory)
interpret_string += self.get_feature_importance(directory, base_file_name)
return interpret_string
......@@ -54,20 +54,3 @@ class SGD(SGDClassifier, BaseMonoviewClassifier):
["l1", "l2", "elasticnet"],
CustomUniform(loc=0, state=1), [random_state]]
self.weird_strings = {}
def get_interpretation(self, directory, y_test, multiclass=False):
"""
Parameters
----------
directory
y_test
Returns
-------
interpret_string str to interpreted
"""
interpret_string = ""
# self.feature_importances_ = (self.coef_/np.sum(self.coef_)).reshape(self.coef_.shape[1])
return interpret_string
......@@ -34,8 +34,3 @@ class SVMLinear(SVCClassifier, BaseMonoviewClassifier):
)
self.param_names = ["C", "random_state"]
self.distribs = [CustomUniform(loc=0, state=1), [random_state]]
def get_interpretation(self, directory, y_test):
interpret_string = ""
# self.feature_importances_ = (self.coef_/np.sum(self.coef_)).reshape((self.coef_.shape[1],))
return interpret_string
......@@ -63,10 +63,12 @@ def init_constants(kwargs, classification_indices, metrics,
logging.info("Info:\t Shape of " + str(view_name) + " :" + str(
dataset_var.get_shape()))
labels = dataset_var.get_labels()
output_file_name = os.path.join(directory, classifier_name,
classifier_name+"-"+dataset_var.get_name()+"-")
directory = os.path.join(directory, classifier_name)
base_file_name = classifier_name+"-"+dataset_var.get_name()+"-"
output_file_name = os.path.join(directory, base_file_name)
return classifier_name, t_start, views_indices, \
classifier_config, views, learning_rate, labels, output_file_name
classifier_config, views, learning_rate, labels, output_file_name,\
directory, base_file_name
def save_results(string_analysis, images_analysis, output_file_name):
......@@ -239,7 +241,9 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
views, \
learning_rate, \
labels, \
output_file_name = init_constants(kwargs, classification_indices, metrics, name,
output_file_name,\
directory,\
base_file_name = init_constants(kwargs, classification_indices, metrics, name,
nb_cores, k_folds, dataset_var, directory)
logging.debug("Done:\t Initialize constants")
......@@ -330,7 +334,8 @@ def exec_multiview(directory, dataset_var, name, classification_indices,
class_label_names=list(labels_dictionary.values()),
train_pred=train_pred,
test_pred=test_pred,
output_file_name=output_file_name,
directory=directory,
base_file_name=base_file_name,
labels=labels,
database_name=dataset_var.get_name(),
nb_cores=nb_cores,
......
......@@ -177,13 +177,14 @@ class MultiviewResultAnalyzer(ResultAnalyser):
def __init__(self, view_names, classifier, classification_indices, k_folds,
hps_method, metrics_list, n_iter, class_label_names,
train_pred, test_pred, output_file_name, labels, database_name,
nb_cores, duration):
train_pred, test_pred, directory, base_file_name, labels,
database_name, nb_cores, duration):
if hps_method.endswith("equiv"):
n_iter = n_iter*len(view_names)
ResultAnalyser.__init__(self, classifier, classification_indices, k_folds,
hps_method, metrics_list, n_iter, class_label_names,
train_pred, test_pred, output_file_name, labels, database_name,
train_pred, test_pred, directory,
base_file_name, labels, database_name,
nb_cores, duration)
self.classifier_name = classifier.short_name
self.view_names = view_names
......
......@@ -96,7 +96,8 @@ class BaseClassifier(BaseEstimator, ):
else:
return str(self.get_params()[param_name])
def get_interpretation(self, directory, y_test, multi_class=False):
def get_interpretation(self, directory, base_file_name, y_test,
multi_class=False):
"""
Base method that returns an empty string if there is not interpretation
method in the classifier's module
......@@ -160,8 +161,8 @@ class ResultAnalyser():
def __init__(self, classifier, classification_indices, k_folds,
hps_method, metrics_list, n_iter, class_label_names,
train_pred, test_pred, directory, labels, database_name,
nb_cores, duration):
train_pred, test_pred, directory, base_file_name, labels,
database_name, nb_cores, duration):
"""
Parameters
......@@ -204,6 +205,7 @@ class ResultAnalyser():
self.train_pred = train_pred
self.test_pred = test_pred
self.directory = directory
self.base_file_name = base_file_name
self.labels = labels
self.string_analysis = ""
self.database_name = database_name
......@@ -336,7 +338,7 @@ class ResultAnalyser():
string_analysis += "\n\n Classification took {}".format(hms(seconds=int(self.duration)))
string_analysis += "\n\n Classifier Interpretation : \n"
string_analysis += self.classifier.get_interpretation(
self.directory,
self.directory, self.base_file_name,
self.labels[self.test_indices])
image_analysis = {}
return string_analysis, image_analysis, self.metric_scores
......
......@@ -166,7 +166,7 @@ class MultiClassWrapper:
def get_config(self):
return self.estimator.get_config()
def get_interpretation(self, output_file_name=None, y_test=None):
def get_interpretation(self, directory, base_file_name, y_test=None):
# return self.estimator.get_interpretation(output_file_name, y_test,
# multi_class=True)
# TODO : Multiclass interpretation
......
......@@ -41,7 +41,9 @@ class Test_initConstants(unittest.TestCase):
X, \
learningRate, \
labelsString, \
outputFileName = exec_classif_mono_view.init_constants(cls.args,
output_file_name,\
directory,\
base_file_name = exec_classif_mono_view.init_constants(cls.args,
cls.X,
cls.classification_indices,
cls.labels_names,
......@@ -54,7 +56,7 @@ class Test_initConstants(unittest.TestCase):
np.testing.assert_array_equal(X, cls.X_value)
cls.assertEqual(learningRate, 0.5)
cls.assertEqual(labelsString, "test_true-test_false")
# cls.assertEqual(outputFileName, "Code/tests/temp_tests/test_dir/test_clf/test_dataset/results-test_clf-test_true-test_false-learnRate0.5-test-test_dataset-")
# cls.assertEqual(output_file_name, "Code/tests/temp_tests/test_dir/test_clf/test_dataset/results-test_clf-test_true-test_false-learnRate0.5-test-test_dataset-")
@classmethod
def tearDownClass(cls):
......
......@@ -41,6 +41,7 @@ class Test_ResultAnalyzer(unittest.TestCase):
cls.test_pred = np.random.randint(0, cls.n_classes,
size=cls.test_length)
cls.directory = "fake_directory"
cls.base_file_name = "fake_file"
cls.labels = np.random.randint(0, cls.n_classes,
size=cls.n_examples)
cls.database_name = "test_database"
......@@ -60,8 +61,9 @@ class Test_ResultAnalyzer(unittest.TestCase):
self.k_folds, self.hps_method, self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred, self.directory,
self.labels, self.database_name,
self.nb_cores, self.duration)
self.base_file_name, self.labels,
self.database_name, self.nb_cores,
self.duration)
def test_get_metric_scores(self):
RA = base.ResultAnalyser(self.classifier, self.classification_indices,
......@@ -69,7 +71,7 @@ class Test_ResultAnalyzer(unittest.TestCase):
self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred,
self.directory,
self.directory, self.base_file_name,
self.labels, self.database_name,
self.nb_cores, self.duration)
train_score, test_score = RA.get_metric_scores("accuracy_score", {})
......@@ -82,7 +84,7 @@ class Test_ResultAnalyzer(unittest.TestCase):
self.metrics_list,
self.n_iter, self.class_label_names,
self.train_pred, self.test_pred,
self.directory,
self.directory, self.base_file_name,
self.labels, self.database_name,
self.nb_cores, self.duration)
RA.get_all_metrics_scores()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment