Skip to content
Snippets Groups Projects
Commit 8b68f844 authored by bbauvin's avatar bbauvin
Browse files

Debugging

parent 4c75286c
No related branches found
No related tags found
No related merge requests found
...@@ -71,7 +71,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt ...@@ -71,7 +71,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt
if fold != range(len(y_train)): if fold != range(len(y_train)):
fold.sort() fold.sort()
trainIndices = [index for index in range(len(y_train)) if (index not in fold)] trainIndices = [index for index in range(len(y_train)) if (index not in fold)]
attributeClassification, binaryAttributes, dsetFile = transformData(X_train[trainIndices]) attributeClassification, binaryAttributes, dsetFile, name = transformData(X_train[trainIndices])
try: try:
classifier.fit(binaryAttributes, y_train[trainIndices], X=None, attribute_classifications=attributeClassification, iteration_callback=None) classifier.fit(binaryAttributes, y_train[trainIndices], X=None, attribute_classifications=attributeClassification, iteration_callback=None)
...@@ -81,6 +81,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt ...@@ -81,6 +81,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt
except: except:
pass pass
dsetFile.close() dsetFile.close()
os.remove(name)
if scores==[]: if scores==[]:
score = baseScore score = baseScore
else: else:
......
from sklearn.metrics import precision_recall_fscore_support # from sklearn.metrics import precision_recall_fscore_support
from sklearn.cross_validation import StratifiedShuffleSplit as split # from sklearn.cross_validation import StratifiedShuffleSplit as split
import numpy as np # import numpy as np
# from sklearn.multiclass import OneVsRestClassifier # # from sklearn.multiclass import OneVsRestClassifier
from ModifiedMulticlass import OneVsRestClassifier # from ModifiedMulticlass import OneVsRestClassifier
#
# Add weights # from sklearn import tree
# from sklearn.metrics import accuracy_score
pass # import numpy as np
# from ModifiedMulticlass import OneVsRestClassifier
# from SubSampling import subSample
# import logging
# # Add weights
#
# def DecisionTree(data, labels, arg, weights):
# depth = int(arg[0])
# subSampling = float(arg[1])
# if subSampling != 1.0:
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, weights=weights)
# else:
# subSampledData, subSampledLabels, subSampledWeights = data, labels, weights
# isBad = False
# classifier = tree.DecisionTreeClassifier(max_depth=depth)
# #classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth))
# classifier.fit(subSampledData, subSampledLabels, subSampledWeights)
# prediction = classifier.predict(data)
# accuracy = accuracy_score(labels, prediction)
# if accuracy < 0.5:
# isBad = True
#
# return classifier, prediction, isBad, accuracy
#
#
# def getConfig(classifierConfig):
# depth = classifierConfig[0]
# subSampling = classifierConfig[1]
# return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' '
#
#
# def gridSearch(data, labels, metric="accuracy_score"):
# minSubSampling = 1.0/(len(labels)/2)
# bestSettings = []
# bestResults = []
# classifier = tree.DecisionTreeClassifier(max_depth=1)
# preliminary_accuracies = np.zeros(50)
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.05)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# preliminary_accuracies[i] = accuracy_score(labels, prediction)
# preliminary_accuracy = np.mean(preliminary_accuracies)
# if preliminary_accuracy < 0.50:
# for max_depth in np.arange(10)+1:
# for subSampling in sorted(np.arange(20, dtype=float)+1/20, reverse=True):
# if subSampling > minSubSampling:
# accuracies = np.zeros(50)
# for i in range(50):
# if subSampling != 1.0:
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling)
# else:
# subSampledData, subSampledLabels, = data, labels
# classifier = tree.DecisionTreeClassifier(max_depth=max_depth)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# accuracies[i] = accuracy_score(labels, prediction)
# accuracy = np.mean(accuracies)
# if 0.5 < accuracy < 0.60:
# bestSettings.append([max_depth, subSampling])
# bestResults.append(accuracy)
# else:
# preliminary_accuracies = np.zeros(50)
# if minSubSampling < 0.01:
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.01)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# preliminary_accuracies[i] = accuracy_score(labels, prediction)
# preliminary_accuracy = np.mean(preliminary_accuracies)
# if preliminary_accuracy < 0.50:
# for subSampling in sorted((np.arange(19, dtype=float)+1)/200, reverse=True):
# if minSubSampling < subSampling:
# accuracies = np.zeros(50)
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling)
# classifier = tree.DecisionTreeClassifier(max_depth=1)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# accuracies[i] = accuracy_score(labels, prediction)
# accuracy = np.mean(accuracies)
# if 0.5 < accuracy < 0.60:
# bestSettings.append([1, subSampling])
# bestResults.append(accuracy)
# else:
# for subSampling in sorted((np.arange(19, dtype=float)+1)/2000, reverse=True):
# accuracies = np.zeros(50)
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling)
# if minSubSampling < subSampling:
# classifier1 = tree.DecisionTreeClassifier(max_depth=1)
# classifier1.fit(subSampledData, subSampledLabels)
# prediction = classifier1.predict(data)
# accuracies[i] = accuracy_score(labels, prediction)
# accuracy = np.mean(accuracies)
# if 0.5 < accuracy < 0.60:
# bestSettings.append([1, subSampling])
# bestResults.append(accuracy)
#
# assert bestResults!=[], "No good settings found for Decision Tree!"
#
# return getBestSetting(bestSettings, bestResults)
#
#
# def getBestSetting(bestSettings, bestResults):
# diffTo52 = 100.0
# bestSettingsIndex = 0
# for resultIndex, result in enumerate(bestResults):
# if abs(0.55-result) < diffTo52:
# diffTo52 = abs(0.55-result)
# bestResult = result
# bestSettingsIndex = resultIndex
# logging.debug("\t\tInfo:\t Best Reslut : "+str(result))
#
# return map(lambda p: round(p, 4), bestSettings[bestSettingsIndex])
# # return map(round(,4), bestSettings[bestSettingsIndex])
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment