Skip to content
Snippets Groups Projects
Commit 8b68f844 authored by bbauvin's avatar bbauvin
Browse files

Debugging

parent 4c75286c
No related branches found
No related tags found
No related merge requests found
......@@ -71,7 +71,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt
if fold != range(len(y_train)):
fold.sort()
trainIndices = [index for index in range(len(y_train)) if (index not in fold)]
attributeClassification, binaryAttributes, dsetFile = transformData(X_train[trainIndices])
attributeClassification, binaryAttributes, dsetFile, name = transformData(X_train[trainIndices])
try:
classifier.fit(binaryAttributes, y_train[trainIndices], X=None, attribute_classifications=attributeClassification, iteration_callback=None)
......@@ -81,6 +81,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt
except:
pass
dsetFile.close()
os.remove(name)
if scores==[]:
score = baseScore
else:
......
from sklearn.metrics import precision_recall_fscore_support
from sklearn.cross_validation import StratifiedShuffleSplit as split
import numpy as np
# from sklearn.multiclass import OneVsRestClassifier
from ModifiedMulticlass import OneVsRestClassifier
# Add weights
pass
# from sklearn.metrics import precision_recall_fscore_support
# from sklearn.cross_validation import StratifiedShuffleSplit as split
# import numpy as np
# # from sklearn.multiclass import OneVsRestClassifier
# from ModifiedMulticlass import OneVsRestClassifier
#
# from sklearn import tree
# from sklearn.metrics import accuracy_score
# import numpy as np
# from ModifiedMulticlass import OneVsRestClassifier
# from SubSampling import subSample
# import logging
# # Add weights
#
# def DecisionTree(data, labels, arg, weights):
# depth = int(arg[0])
# subSampling = float(arg[1])
# if subSampling != 1.0:
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, weights=weights)
# else:
# subSampledData, subSampledLabels, subSampledWeights = data, labels, weights
# isBad = False
# classifier = tree.DecisionTreeClassifier(max_depth=depth)
# #classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth))
# classifier.fit(subSampledData, subSampledLabels, subSampledWeights)
# prediction = classifier.predict(data)
# accuracy = accuracy_score(labels, prediction)
# if accuracy < 0.5:
# isBad = True
#
# return classifier, prediction, isBad, accuracy
#
#
# def getConfig(classifierConfig):
# depth = classifierConfig[0]
# subSampling = classifierConfig[1]
# return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' '
#
#
# def gridSearch(data, labels, metric="accuracy_score"):
# minSubSampling = 1.0/(len(labels)/2)
# bestSettings = []
# bestResults = []
# classifier = tree.DecisionTreeClassifier(max_depth=1)
# preliminary_accuracies = np.zeros(50)
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.05)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# preliminary_accuracies[i] = accuracy_score(labels, prediction)
# preliminary_accuracy = np.mean(preliminary_accuracies)
# if preliminary_accuracy < 0.50:
# for max_depth in np.arange(10)+1:
# for subSampling in sorted(np.arange(20, dtype=float)+1/20, reverse=True):
# if subSampling > minSubSampling:
# accuracies = np.zeros(50)
# for i in range(50):
# if subSampling != 1.0:
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling)
# else:
# subSampledData, subSampledLabels, = data, labels
# classifier = tree.DecisionTreeClassifier(max_depth=max_depth)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# accuracies[i] = accuracy_score(labels, prediction)
# accuracy = np.mean(accuracies)
# if 0.5 < accuracy < 0.60:
# bestSettings.append([max_depth, subSampling])
# bestResults.append(accuracy)
# else:
# preliminary_accuracies = np.zeros(50)
# if minSubSampling < 0.01:
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.01)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# preliminary_accuracies[i] = accuracy_score(labels, prediction)
# preliminary_accuracy = np.mean(preliminary_accuracies)
# if preliminary_accuracy < 0.50:
# for subSampling in sorted((np.arange(19, dtype=float)+1)/200, reverse=True):
# if minSubSampling < subSampling:
# accuracies = np.zeros(50)
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling)
# classifier = tree.DecisionTreeClassifier(max_depth=1)
# classifier.fit(subSampledData, subSampledLabels)
# prediction = classifier.predict(data)
# accuracies[i] = accuracy_score(labels, prediction)
# accuracy = np.mean(accuracies)
# if 0.5 < accuracy < 0.60:
# bestSettings.append([1, subSampling])
# bestResults.append(accuracy)
# else:
# for subSampling in sorted((np.arange(19, dtype=float)+1)/2000, reverse=True):
# accuracies = np.zeros(50)
# for i in range(50):
# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling)
# if minSubSampling < subSampling:
# classifier1 = tree.DecisionTreeClassifier(max_depth=1)
# classifier1.fit(subSampledData, subSampledLabels)
# prediction = classifier1.predict(data)
# accuracies[i] = accuracy_score(labels, prediction)
# accuracy = np.mean(accuracies)
# if 0.5 < accuracy < 0.60:
# bestSettings.append([1, subSampling])
# bestResults.append(accuracy)
#
# assert bestResults!=[], "No good settings found for Decision Tree!"
#
# return getBestSetting(bestSettings, bestResults)
#
#
# def getBestSetting(bestSettings, bestResults):
# diffTo52 = 100.0
# bestSettingsIndex = 0
# for resultIndex, result in enumerate(bestResults):
# if abs(0.55-result) < diffTo52:
# diffTo52 = abs(0.55-result)
# bestResult = result
# bestSettingsIndex = resultIndex
# logging.debug("\t\tInfo:\t Best Reslut : "+str(result))
#
# return map(lambda p: round(p, 4), bestSettings[bestSettingsIndex])
# # return map(round(,4), bestSettings[bestSettingsIndex])
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment