Skip to content
Snippets Groups Projects
Commit cbea8ff6 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Updated config functions

parent a77543da
No related branches found
No related tags found
No related merge requests found
from sklearn.model_selection import RandomizedSearchCV from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint from scipy.stats import uniform, randint
from sklearn.pipeline import Pipeline
import numpy as np import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import pickle
from .. import Metrics from .. import Metrics
from ..utils import HyperParameterSearch from ..utils import HyperParameterSearch
...@@ -113,11 +115,39 @@ class BaseMonoviewClassifier(object): ...@@ -113,11 +115,39 @@ class BaseMonoviewClassifier(object):
else: else:
return str(self.get_params()[param_name]) return str(self.get_params()[param_name])
def getFeatureImportance(self, directory, nb_considered_feats=50):
"""Used to generate a graph and a pickle dictionary representing feature importances"""
featureImportances = self.feature_importances_
sortedArgs = np.argsort(-featureImportances)
featureImportancesSorted = featureImportances[sortedArgs][:nb_considered_feats]
featureIndicesSorted = sortedArgs[:nb_considered_feats]
fig, ax = plt.subplots()
x = np.arange(len(featureIndicesSorted))
formatter = FuncFormatter(percent)
ax.yaxis.set_major_formatter(formatter)
plt.bar(x, featureImportancesSorted)
plt.title("Importance depending on feature")
fig.savefig(directory + "feature_importances.png")
plt.close()
featuresImportancesDict = dict((featureIndex, featureImportance)
for featureIndex, featureImportance in enumerate(featureImportances)
if featureImportance != 0)
with open(directory + 'feature_importances.pickle', 'wb') as handle:
pickle.dump(featuresImportancesDict, handle)
interpretString = "Feature importances : \n"
for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted):
if featureImportance > 0:
interpretString += "- Feature index : " + str(featureIndex) + \
", feature importance : " + str(featureImportance) + "\n"
return interpretString
def get_names(classed_list): def get_names(classed_list):
return np.array([object_.__class__.__name__ for object_ in classed_list]) return np.array([object_.__class__.__name__ for object_ in classed_list])
def percent(x, pos):
"""Used to print percentage of importance on the y axis"""
return '%1.1f %%' % (x * 100)
# def isUseful(labelSupports, index, CLASS_LABELS, labelDict): # def isUseful(labelSupports, index, CLASS_LABELS, labelDict):
......
...@@ -18,12 +18,12 @@ def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames ...@@ -18,12 +18,12 @@ def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames
def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory): def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, directory):
classifierConfigString = "Classifier configuration : \n" classifierConfigString = "Classifier configuration : \n"
classifierConfigString += "\t- " + classifier.getConfig(clKWARGS)[5:] + "\n" classifierConfigString += "\t- " + classifier.getConfig()[5:] + "\n"
classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n" classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n"
if gridSearch: if gridSearch:
classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n" classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n"
classifierConfigString += "\n\n" classifierConfigString += "\n\n"
classifierInterpretString = classifier.getInterpret(classifier, directory) classifierInterpretString = classifier.getInterpret(directory)
return classifierConfigString, classifierInterpretString return classifierConfigString, classifierInterpretString
......
from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier
from ..utils.Interpret import getFeatureImportance
from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier from ..Monoview.MonoviewUtils import CustomRandint, BaseMonoviewClassifier
# Author-Info # Author-Info
...@@ -35,28 +34,9 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier): ...@@ -35,28 +34,9 @@ class Adaboost(AdaBoostClassifier, BaseMonoviewClassifier):
"base_estimator": DecisionTreeClassifier()}) "base_estimator": DecisionTreeClassifier()})
return paramsSet return paramsSet
# def genPipeline(self): def getInterpret(self, directory):
# return Pipeline([('classifier', AdaBoostClassifier())]) interpretString = ""
interpretString += self.getFeatureImportance(directory)
# def genDistribs(self,):
# return {"classifier__n_estimators": CustomRandint(low=1, high=500),
# "classifier__base_estimator": [DecisionTreeClassifier()]}
# def genParamsFromDetector(self, detector):
# nIter = len(detector.cv_results_['param_classifier__n_estimators'])
# return [("baseEstimators", np.array(["DecisionTree" for _ in range(nIter)])),
# ("nEstimators", np.array(detector.cv_results_['param_classifier__n_estimators']))]
def getConfig(self, config):
if type(config) is not dict: # Used in late fusion when config is a classifier
return "\n\t\t- Adaboost with num_esimators : " + str(config.n_estimators) + ", base_estimators : " + str(
config.base_estimator)
else:
return "\n\t\t- Adaboost with n_estimators : " + str(config["n_estimators"]) + ", base_estimator : " + str(
config["base_estimator"])
def getInterpret(self, classifier, directory):
interpretString = getFeatureImportance(classifier, directory)
return interpretString return interpretString
......
...@@ -8,7 +8,6 @@ import numpy as np ...@@ -8,7 +8,6 @@ import numpy as np
from .. import Metrics from .. import Metrics
from ..utils.HyperParameterSearch import genHeatMaps from ..utils.HyperParameterSearch import genHeatMaps
from ..utils.Interpret import getFeatureImportance
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
......
...@@ -3,11 +3,6 @@ from sklearn.pipeline import Pipeline ...@@ -3,11 +3,6 @@ from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint from scipy.stats import randint
import numpy as np import numpy as np
# import cPickle
from .. import Metrics
from ..utils.HyperParameterSearch import genHeatMaps
from ..utils.Interpret import getFeatureImportance
# Author-Info # Author-Info
__author__ = "Baptiste Bauvin" __author__ = "Baptiste Bauvin"
......
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import pickle
def percent(x, pos):
"""Used to print percentage of importance on the y axis"""
return '%1.1f %%' % (x * 100)
def getFeatureImportance(classifier, directory, interpretString=""):
"""Used to generate a graph and a pickle dictionary representing feature importances"""
featureImportances = classifier.feature_importances_
sortedArgs = np.argsort(-featureImportances)
featureImportancesSorted = featureImportances[sortedArgs][:50]
featureIndicesSorted = sortedArgs[:50]
fig, ax = plt.subplots()
x = np.arange(len(featureIndicesSorted))
formatter = FuncFormatter(percent)
ax.yaxis.set_major_formatter(formatter)
plt.bar(x, featureImportancesSorted)
plt.title("Importance depending on feature")
fig.savefig(directory + "feature_importances.png")
plt.close()
featuresImportancesDict = dict((featureIndex, featureImportance)
for featureIndex, featureImportance in enumerate(featureImportances)
if featureImportance != 0)
with open(directory+'feature_importances.pickle', 'wb') as handle:
pickle.dump(featuresImportancesDict, handle)
interpretString += "Feature importances : \n"
for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted):
if featureImportance>0:
interpretString+="- Feature index : "+str(featureIndex)+\
", feature importance : "+str(featureImportance)+"\n"
return interpretString
\ No newline at end of file
from . import Dataset, execution, HyperParameterSearch, Transformations, Interpret from . import Dataset, execution, HyperParameterSearch, Transformations
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment