Skip to content
Snippets Groups Projects
Commit 7fb05794 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added interpret for adaboost and DT

parent 73051abb
No related branches found
No related tags found
No related merge requests found
......@@ -113,7 +113,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
hyperParamSearch, metrics, nIter, feat, CL_type,
clKWARGS, labelsNames, X.shape,
y_train, y_train_pred, y_test, y_test_pred, t_end,
randomState)
randomState, cl_res, outputFileName)
cl_desc = [value for key, value in sorted(clKWARGS.iteritems())]
logging.debug("Done:\t Getting Results")
logging.info(stringAnalysis)
......
......@@ -16,7 +16,7 @@ def getDBConfigString(name, feat, classificationIndices, shape, classLabelsNames
return dbConfigString
def getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS):
def getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS, classififer, directory):
classifierModule = getattr(MonoviewClassifiers, CL_type)
classifierConfigString = "Classifier configuration : \n"
classifierConfigString += "\t- " + classifierModule.getConfig(clKWARGS)[5:] + "\n"
......@@ -24,7 +24,8 @@ def getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS):
if gridSearch:
classifierConfigString += "\t- Got configuration using randomized search with " + str(nIter) + " iterations \n"
classifierConfigString += "\n\n"
return classifierConfigString
classifierInterpretString = classifierModule.getInterpret(classififer, directory)
return classifierConfigString, classifierInterpretString
def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred):
......@@ -43,7 +44,7 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred):
def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type, clKWARGS, classLabelsNames,
shape, y_train, y_train_pred, y_test, y_test_pred, time, randomState):
shape, y_train, y_train_pred, y_test, y_test_pred, time, randomState, classifier, directory):
metricsScores = {}
metricModule = getattr(Metrics, metrics[0][0])
trainScore = metricModule.score(y_train, y_train_pred)
......@@ -53,7 +54,8 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, fea
stringAnalysis += metrics[0][0] + " on train : " + str(trainScore) + "\n" + metrics[0][0] + " on test : " + str(
testScore) + "\n\n"
stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds)
stringAnalysis += getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS)
classifierConfigString, classifierIntepretString = getClassifierConfigString(CL_type, gridSearch, nbCores, nIter, clKWARGS, classifier, directory)
stringAnalysis += classifierConfigString
for metric in metrics:
stringAnalysis += getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred)
if metric[1] is not None:
......@@ -63,6 +65,8 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics, nIter, fea
metricsScores[metric[0]] = [getattr(Metrics, metric[0]).score(y_train, y_train_pred),
getattr(Metrics, metric[0]).score(y_test, y_test_pred)]
stringAnalysis += "\n\n Classification took " + str(hms(seconds=int(time)))
stringAnalysis += "\n\n Classifier Interpretation : \n"
stringAnalysis+= classifierIntepretString
imageAnalysis = {}
return stringAnalysis, imageAnalysis, metricsScores
......@@ -4,6 +4,7 @@ from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
import numpy as np
import cPickle
from .. import Metrics
from ..utils.HyperParameterSearch import genHeatMaps
......@@ -79,3 +80,20 @@ def getConfig(config):
except:
return "\n\t\t- Adaboost with num_esimators : " + str(config["0"]) + ", base_estimators : " + str(
config["1"])
def getInterpret(classifier, directory):
featureImportances = classifier.feature_importances_
sortedArgs = np.argsort(-featureImportances)
featureImportancesSorted = featureImportances[sortedArgs][:50]
featureIndicesSorted = sortedArgs[:50]
featuresImportancesDict = dict((featureIndex, featureImportance)
for featureIndex, featureImportance in enumerate(featureImportances)
if featureImportance != 0)
with open(directory+'-feature_importances.pickle', 'wb') as handle:
cPickle.dump(featuresImportancesDict, handle)
interpretString = "Feature importances : \n"
for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted):
if featureImportance>0:
interpretString+="- Feature index : "+str(featureIndex)+\
", feature importance : "+str(featureImportance)+"\n"
return interpretString
\ No newline at end of file
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.pipeline import Pipeline # Pipelining in classification
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
import numpy as np
import graphviz
import cPickle
from .. import Metrics
from ..utils.HyperParameterSearch import genHeatMaps
......@@ -20,7 +22,7 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
maxDepth = int(kwargs['0'])
criterion = kwargs['1']
splitter = kwargs['2']
classifier = DecisionTreeClassifier(max_depth=maxDepth, criterion=criterion, splitter=splitter,
classifier = tree.DecisionTreeClassifier(max_depth=maxDepth, criterion=criterion, splitter=splitter,
random_state=randomState)
classifier.fit(DATASET, CLASS_LABELS)
return classifier
......@@ -48,7 +50,7 @@ def getKWARGS(kwargsList):
def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, nbCores=1,
metric=["accuracy_score", None], nIter=30):
pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())])
pipeline_DT = Pipeline([('classifier', tree.DecisionTreeClassifier())])
param_DT = {"classifier__max_depth": randint(1, 300),
"classifier__criterion": ["gini", "entropy"],
"classifier__splitter": ["best", "random"]}
......@@ -85,3 +87,23 @@ def getConfig(config):
except:
return "\n\t\t- Decision Tree with max_depth : " + str(config["0"]) + ", criterion : " + config[
"1"] + ", splitter : " + config["2"]
def getInterpret(classifier, directory):
dot_data = tree.export_graphviz(classifier, out_file=None)
graph = graphviz.Source(dot_data)
graph.render(directory+"-tree.pdf")
featureImportances = classifier.feature_importances_
sortedArgs = np.argsort(-featureImportances)
featureImportancesSorted = featureImportances[sortedArgs][:50]
featureIndicesSorted = sortedArgs[:50]
featuresImportancesDict = dict((featureIndex, featureImportance)
for featureIndex, featureImportance in enumerate(featureImportances)
if featureImportance != 0)
with open(directory + '-feature_importances.pickle', 'wb') as handle:
cPickle.dump(featuresImportancesDict, handle)
interpretString = "Feature importances : \n"
for featureIndex, featureImportance in zip(featureIndicesSorted, featureImportancesSorted):
if featureImportance > 0:
interpretString += "- Feature index : " + str(featureIndex) + \
", feature importance : " + str(featureImportance) + "\n"
return interpretString
\ No newline at end of file
import numpy as np
import pyscm
# from pyscm.utils import _pack_binary_bytes_to_ints
import os
import h5py
# from pyscm.binary_attributes.classifications.popcount import inplace_popcount_32, inplace_popcount_64
# from pyscm.utils import _unpack_binary_bytes_from_ints
from pyscm.scm import SetCoveringMachineClassifier as scm
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.externals.six import iteritems, iterkeys, itervalues
from math import ceil
import random
from sklearn.metrics import accuracy_score
import itertools
import pkgutil
from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig
from ..... import MonoviewClassifiers
......
......@@ -96,8 +96,15 @@ def testVersions():
isUpToDate = False
toInstall.append("h5py")
try:
import graphviz #
except:
isUpToDate = False
toInstall.append("graphviz")
if not isUpToDate:
print "You can't run at the moment, please install the following modules : \n"+ "\n".join(toInstall)
quit()
if __name__== "__main__":
testVersions()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment