diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py index 66a39eddbd40620b590b7a9473c2fb143d57cf19..b5f75f49cb1194e52b364edfa889643bc633b7e4 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -4,10 +4,13 @@ from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier from scipy.stats import randint import numpy as np -import cPickle +# import cPickle +# import matplotlib.pyplot as plt +# from matplotlib.ticker import FuncFormatter from .. import Metrics from ..utils.HyperParameterSearch import genHeatMaps +from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" @@ -82,18 +85,5 @@ def getConfig(config): config["1"]) def getInterpret(classifier, directory): - featureImportances = classifier.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:50] - featureIndicesSorted = sortedArgs[:50] - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) - if featureImportance != 0) - with open(directory+'-feature_importances.pickle', 'wb') as handle: - cPickle.dump(featuresImportancesDict, handle) - interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): - if featureImportance>0: - interpretString+="- Feature index : "+str(featureIndex)+\ - ", feature importance : "+str(featureImportance)+"\n" + interpretString = getFeatureImportance(classifier, directory) return interpretString \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py index 152e9e7d498803353a49a6a2725c5ddbd233b9ef..1dbd83adeaef9bbd9afd130a6914e1c5de4e337c 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -4,10 +4,11 @@ from sklearn.model_selection import RandomizedSearchCV from scipy.stats import randint import numpy as np import graphviz -import cPickle +# import cPickle from .. import Metrics from ..utils.HyperParameterSearch import genHeatMaps +from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" @@ -92,18 +93,5 @@ def getInterpret(classifier, directory): dot_data = tree.export_graphviz(classifier, out_file=None) graph = graphviz.Source(dot_data) graph.render(directory+"-tree.pdf") - featureImportances = classifier.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:50] - featureIndicesSorted = sortedArgs[:50] - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) - if featureImportance != 0) - with open(directory + '-feature_importances.pickle', 'wb') as handle: - cPickle.dump(featuresImportancesDict, handle) - interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): - if featureImportance > 0: - interpretString += "- Feature index : " + str(featureIndex) + \ - ", feature importance : " + str(featureImportance) + "\n" + interpretString = getFeatureImportance(classifier, directory) return interpretString \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py index 66caa2d4c30baeaff8da66a9ae07651168e1ef0a..79ea71e43ef03f6919a8c08d3dedf186b10e0b9f 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -3,10 +3,11 @@ from sklearn.pipeline import Pipeline from sklearn.model_selection import RandomizedSearchCV from scipy.stats import randint import numpy as np -import cPickle +# import cPickle from .. import Metrics from ..utils.HyperParameterSearch import genHeatMaps +from ..utils.Interpret import getFeatureImportance # Author-Info __author__ = "Baptiste Bauvin" @@ -90,18 +91,5 @@ def getConfig(config): def getInterpret(classifier, directory): - featureImportances = classifier.feature_importances_ - sortedArgs = np.argsort(-featureImportances) - featureImportancesSorted = featureImportances[sortedArgs][:50] - featureIndicesSorted = sortedArgs[:50] - featuresImportancesDict = dict((featureIndex, featureImportance) - for featureIndex, featureImportance in enumerate(featureImportances) - if featureImportance != 0) - with open(directory+'-feature_importances.pickle', 'wb') as handle: - cPickle.dump(featuresImportancesDict, handle) - interpretString = "Feature importances : \n" - for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): - if featureImportance>0: - interpretString+="- Feature index : "+str(featureIndex)+ \ - ", feature importance : "+str(featureImportance)+"\n" + interpretString = getFeatureImportance(classifier, directory) return interpretString diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py index ce998ed4e47b098165e169d2afdcc1a1348f137f..56084400203c806baeea81d0d235b0004e726ddd 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py @@ -78,10 +78,6 @@ def paramsToSet(nIter, randomState): return paramsSet -def getInterpret(classifier, directory): - return "" - - def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -133,4 +129,8 @@ def getConfig(config): str(config[2]) except: return "\n\t\t- SCM with model_type: " + config["0"] + ", max_rules : " + str(config["1"]) + ", p : " + \ - str(config["2"]) \ No newline at end of file + str(config["2"]) + + +def getInterpret(classifier, directory): + return "" diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py index e3182787b78125057518afe2626ec08e50a5f179..0bbd424d0fad8104c25421db79ffe1188965512c 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SGD.py @@ -91,4 +91,6 @@ def getConfig(config): "1"] + ", alpha : " + str(config["2"]) def getInterpret(classifier, directory): + # TODO : coeffs return "" +# \ No newline at end of file diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py index 63872d5f423a62aa6f381c0b4274e0e7f00ff96f..87ad608a2b86fd411401dfc6111910295ea65ef5 100644 --- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -75,4 +75,5 @@ def getConfig(config): return "\n\t\t- SVM Linear with C : " + str(config["0"]) def getInterpret(classifier, directory): + # TODO : coeffs return "" diff --git a/Code/MonoMultiViewClassifiers/utils/Interpret.py b/Code/MonoMultiViewClassifiers/utils/Interpret.py new file mode 100644 index 0000000000000000000000000000000000000000..6f1882a5fa962e4bb3088570cbc9c5c2489e08e9 --- /dev/null +++ b/Code/MonoMultiViewClassifiers/utils/Interpret.py @@ -0,0 +1,35 @@ +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import FuncFormatter +import cPickle + + +def percent(x, pos): + 'The two args are the value and tick position' + return '%1.1f %%' % (x * 100) + + +def getFeatureImportance(classifier, directory, interpretString=""): + featureImportances = classifier.feature_importances_ + sortedArgs = np.argsort(-featureImportances) + featureImportancesSorted = featureImportances[sortedArgs][:50] + featureIndicesSorted = sortedArgs[:50] + fig, ax = plt.subplots() + x = np.arange(50) + formatter = FuncFormatter(percent) + ax.yaxis.set_major_formatter(formatter) + plt.bar(x, featureImportancesSorted) + plt.title("Importance depending on feature") + fig.savefig(directory + "-feature_importances.png") + plt.close() + featuresImportancesDict = dict((featureIndex, featureImportance) + for featureIndex, featureImportance in enumerate(featureImportances) + if featureImportance != 0) + with open(directory+'-feature_importances.pickle', 'wb') as handle: + cPickle.dump(featuresImportancesDict, handle) + interpretString += "Feature importances : \n" + for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted): + if featureImportance>0: + interpretString+="- Feature index : "+str(featureIndex)+\ + ", feature importance : "+str(featureImportance)+"\n" + return interpretString \ No newline at end of file