From b279acb5bfc1623fa33a315a9bf027881e7dd765 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin.1@ulaval.ca> Date: Sat, 21 Oct 2017 19:05:35 -0400 Subject: [PATCH] Added graphs to vizualize feature importances --- .../MonoviewClassifiers/Adaboost.py | 20 +++-------- .../MonoviewClassifiers/DecisionTree.py | 18 ++-------- .../MonoviewClassifiers/RandomForest.py | 18 ++-------- .../MonoviewClassifiers/SCM.py | 10 +++--- .../MonoviewClassifiers/SGD.py | 2 ++ .../MonoviewClassifiers/SVMLinear.py | 1 + .../utils/Interpret.py | 35 +++++++++++++++++++ 7 files changed, 54 insertions(+), 50 deletions(-) create mode 100644 Code/MonoMultiViewClassifiers/utils/Interpret.py diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 66a39edd..b5f75f49 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -4,10 +4,13 @@ from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier from scipy.stats import randint import numpy as np -import cPickle +# import cPickle +# import matplotlib.pyplot as plt +# from matplotlib.ticker import FuncFormatter from .. import Metrics from ..utils.HyperParameterSearch import genHeatMaps +from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" @@ -82,18 +85,5 @@ def getConfig(config): config["1"]) def getInterpret(classifier, directory): - featureImportances = classifier.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:50] - featureIndicesSorted = sortedArgs[:50] - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) - if featureImportance != 0) - with open(directory+'-feature_importances.pickle', 'wb') as handle: - cPickle.dump(featuresImportancesDict, handle) - interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): - if featureImportance>0: - interpretString+="- Feature index : "+str(featureIndex)+\ - ", feature importance : "+str(featureImportance)+"\n" + interpretString = getFeatureImportance(classifier, directory) return interpretString \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py index 152e9e7d..1dbd83ad 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -4,10 +4,11 @@ from sklearn.model_selection import RandomizedSearchCV from scipy.stats import randint import numpy as np import graphviz -import cPickle +# import cPickle from .. import Metrics from ..utils.HyperParameterSearch import genHeatMaps +from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" @@ -92,18 +93,5 @@ def getInterpret(classifier, directory): dot_data = tree.export_graphviz(classifier, out_file=None) graph = graphviz.Source(dot_data) graph.render(directory+"-tree.pdf") - featureImportances = classifier.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:50] - featureIndicesSorted = sortedArgs[:50] - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) - if featureImportance != 0) - with open(directory + '-feature_importances.pickle', 'wb') as handle: - cPickle.dump(featuresImportancesDict, handle) - interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): - if featureImportance > 0: - interpretString += "- Feature index : " + str(featureIndex) + \ - ", feature importance : " + str(featureImportance) + "\n" + interpretString = getFeatureImportance(classifier, directory) return interpretString \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py index 66caa2d4..79ea71e4 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -3,10 +3,11 @@ from sklearn.pipeline import Pipeline from sklearn.model_selection import RandomizedSearchCV from scipy.stats import randint import numpy as np -import cPickle +# import cPickle from .. import Metrics from ..utils.HyperParameterSearch import genHeatMaps +from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" @@ -90,18 +91,5 @@ def getConfig(config): def getInterpret(classifier, directory): - featureImportances = classifier.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:50] - featureIndicesSorted = sortedArgs[:50] - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) - if featureImportance != 0) - with open(directory+'-feature_importances.pickle', 'wb') as handle: - cPickle.dump(featuresImportancesDict, handle) - interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): - if featureImportance>0: - interpretString+="- Feature index : "+str(featureIndex)+ \ - ", feature importance : "+str(featureImportance)+"\n" + interpretString = getFeatureImportance(classifier, directory) return interpretString diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py index ce998ed4..56084400 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py @@ -78,10 +78,6 @@ def paramsToSet(nIter, randomState): return paramsSet -def getInterpret(classifier, directory): - return "" - - def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -133,4 +129,8 @@ def getConfig(config): str(config[2]) except: return "\n\t\t- SCM with model_type: " + config["0"] + ", max_rules : " + str(config["1"]) + ", p : " + \ - str(config["2"]) \ No newline at end of file + str(config["2"]) + + +def getInterpret(classifier, directory): + return "" diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py index e3182787..0bbd424d 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py @@ -91,4 +91,6 @@ def getConfig(config): "1"] + ", alpha : " + str(config["2"]) def getInterpret(classifier, directory): + # TODO : coeffs return "" +# \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py index 63872d5f..87ad608a 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -75,4 +75,5 @@ def getConfig(config): return "\n\t\t- SVM Linear with C : " + str(config["0"]) def getInterpret(classifier, directory): + # TODO : coeffs return "" diff --git a/Code/MonoMultiViewClassifiers/utils/Interpret.py b/Code/MonoMultiViewClassifiers/utils/Interpret.py new file mode 100644 index 00000000..6f1882a5 --- /dev/null +++ b/Code/MonoMultiViewClassifiers/utils/Interpret.py @@ -0,0 +1,35 @@ +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import FuncFormatter +import cPickle + + +def percent(x, pos): + 'The two args are the value and tick position' + return '%1.1f %%' % (x * 100) + + +def getFeatureImportance(classifier, directory, interpretString=""): + featureImportances = classifier.feature_importances_ + sortedArgs = np.argsort(-featureImportances) + featureImportancesSorted = featureImportances[sortedArgs][:50] + featureIndicesSorted = sortedArgs[:50] + fig, ax = plt.subplots() + x = np.arange(50) + formatter = FuncFormatter(percent) + ax.yaxis.set_major_formatter(formatter) + plt.bar(x, featureImportancesSorted) + plt.title("Importance depending on feature") + fig.savefig(directory + "-feature_importances.png") + plt.close() + featuresImportancesDict = dict((featureIndex, featureImportance) + for featureIndex, featureImportance in enumerate(featureImportances) + if featureImportance != 0) + with open(directory+'-feature_importances.pickle', 'wb') as handle: + cPickle.dump(featuresImportancesDict, handle) + interpretString += "Feature importances : \n" + for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): + if featureImportance>0: + interpretString+="- Feature index : "+str(featureIndex)+\ + ", feature importance : "+str(featureImportance)+"\n" + return interpretString \ No newline at end of file -- GitLab