diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py index e5d2049776658a4adef48bd2395bcd0855cf0b20..e07ce04b0e3aa93de1afaa89d741d3f860d4e6a3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/MonoviewUtils.py @@ -1,7 +1,9 @@ from sklearn.model_selection import RandomizedSearchCV from scipy.stats import uniform, randint -from sklearn.pipeline import Pipeline import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import FuncFormatter +import pickle from .. import Metrics from ..utils import HyperParameterSearch @@ -113,11 +115,39 @@ class BaseMonoviewClassifier(object): else: return str(self.get_params()[param_name]) + def getFeatureImportance(self, directory, nb_considered_feats=50): + """Used to generate a graph and a pickle dictionary representing feature importances""" + featureImportances = self.feature_importances_ + sortedArgs = np.argsort(-featureImportances) + featureImportancesSorted = featureImportances[sortedArgs][:nb_considered_feats] + featureIndicesSorted = sortedArgs[:nb_considered_feats] + fig, ax = plt.subplots() + x = np.arange(len(featureIndicesSorted)) + formatter = FuncFormatter(percent) + ax.yaxis.set_major_formatter(formatter) + plt.bar(x, featureImportancesSorted) + plt.title("Importance depending on feature") + fig.savefig(directory + "feature_importances.png") + plt.close() + featuresImportancesDict = dict((featureIndex, featureImportance) + for featureIndex, featureImportance in enumerate(featureImportances) + if featureImportance != 0) + with open(directory + 'feature_importances.pickle', 'wb') as handle: + pickle.dump(featuresImportancesDict, handle) + interpretString = "Feature importances : \n" + for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): + if featureImportance > 0: + interpretString += "- Feature index : " + str(featureIndex) + \ + ", feature importance : " + str(featureImportance) + "\n" + return interpretString + def get_names(classed_list): return np.array([object_.__class__.__name__ for object_ in classed_list]) - +def percent(x, pos): + """Used to print percentage of importance on the y axis""" + return '%1.1f %%' % (x * 100) # def isUseful(labelSupports, index, CLASS_LABELS, labelDict): diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py index d367589549d7b1571ebc46be4668d444c172a262..b81e84af5593ec70708596ddf4e712098d9274ec 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/analyzeResult.py @@ -18,12 +18,12 @@ def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory): classifierConfigString = "Classifier configuration : \n" - classifierConfigString += "\t- " + classifier.getConfig(clKWARGS)[5:] + "\n" + classifierConfigString += "\t- " + classifier.getConfig()[5:] + "\n" classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n" if gridSearch: classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n" classifierConfigString += "\n\n" - classifierInterpretString = classifier.getInterpret(classifier, directory) + classifierInterpretString = classifier.getInterpret(directory) return classifierConfigString, classifierInterpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 1808176c466095342ced5d92ec054c2e6fd4c265..9980f721d6226110f532525e6fe78a258f4ac107 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -1,7 +1,6 @@ from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier -from ..utils.Interpret import getFeatureImportance from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier # Author-Info @@ -35,28 +34,9 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): "base_estimator": DecisionTreeClassifier()}) return paramsSet - # def genPipeline(self): - # return Pipeline([('classifier', AdaBoostClassifier())]) - - # def genDistribs(self,): - # return {"classifier__n_estimators": CustomRandint(low=1, high=500), - # "classifier__base_estimator": [DecisionTreeClassifier()]} - - # def genParamsFromDetector(self, detector): - # nIter = len(detector.cv_results_['param_classifier__n_estimators']) - # return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])), - # ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))] - - def getConfig(self, config): - if type(config) is not dict: # Used in late fusion when config is a classifier - return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str( - config.base_estimator) - else: - return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str( - config["base_estimator"]) - - def getInterpret(self, classifier, directory): - interpretString = getFeatureImportance(classifier, directory) + def getInterpret(self, directory): + interpretString = "" + interpretString += self.getFeatureImportance(directory) return interpretString diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py index 165123726da54b9bd733c8fc1bde32a2c9483ef4..ac4f9bd5750b04b77dd50c6050fce8868ee92ed3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -8,7 +8,6 @@ import numpy as np from .. import Metrics from ..utils.HyperParameterSearch import genHeatMaps -from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py index 2130acfb779fb29fda488eb0082b8d3a0e210945..a49b6113591b866e5df97f1670f383727deed2a6 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -3,11 +3,6 @@ from sklearn.pipeline import Pipeline from sklearn.model_selection import RandomizedSearchCV from scipy.stats import randint import numpy as np -# import cPickle - -from .. import Metrics -from ..utils.HyperParameterSearch import genHeatMaps -from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/Interpret.py b/multiview_platform/MonoMultiViewClassifiers/utils/Interpret.py deleted file mode 100644 index dc816ac35d50a330fdaf5835106f3da814470a4d..0000000000000000000000000000000000000000 --- a/multiview_platform/MonoMultiViewClassifiers/utils/Interpret.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.ticker import FuncFormatter -import pickle - - -def percent(x, pos): - """Used to print percentage of importance on the y axis""" - return '%1.1f %%' % (x * 100) - - -def getFeatureImportance(classifier, directory, interpretString=""): - """Used to generate a graph and a pickle dictionary representing feature importances""" - featureImportances = classifier.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:50] - featureIndicesSorted = sortedArgs[:50] - fig, ax = plt.subplots() - x = np.arange(len(featureIndicesSorted)) - formatter = FuncFormatter(percent) - ax.yaxis.set_major_formatter(formatter) - plt.bar(x, featureImportancesSorted) - plt.title("Importance depending on feature") - fig.savefig(directory + "feature_importances.png") - plt.close() - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) - if featureImportance != 0) - with open(directory+'feature_importances.pickle', 'wb') as handle: - pickle.dump(featuresImportancesDict, handle) - interpretString += "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): - if featureImportance>0: - interpretString+="- Feature index : "+str(featureIndex)+\ - ", feature importance : "+str(featureImportance)+"\n" - return interpretString \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/__init__.py b/multiview_platform/MonoMultiViewClassifiers/utils/__init__.py index 5baa4d9cca647d122ac24808c64040829eb58200..842d824c6b28acf620ca3b31f973f2a924bd9415 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/__init__.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/__init__.py @@ -1 +1 @@ -from . import Dataset, execution, HyperParameterSearch, Transformations, Interpret +from . import Dataset, execution, HyperParameterSearch, Transformations