diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index be2a0b2a40b48b70d383e1d2e048f8c9ec9deb7f..67f0ce4ba5a9404535e46b8f1b934483c8df62c9 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -71,7 +71,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt if fold != range(len(y_train)): fold.sort() trainIndices = [index for index in range(len(y_train)) if (index not in fold)] - attributeClassification, binaryAttributes, dsetFile = transformData(X_train[trainIndices]) + attributeClassification, binaryAttributes, dsetFile, name = transformData(X_train[trainIndices]) try: classifier.fit(binaryAttributes, y_train[trainIndices], X=None, attribute_classifications=attributeClassification, iteration_callback=None) @@ -81,6 +81,7 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt except: pass dsetFile.close() + os.remove(name) if scores==[]: score = baseScore else: diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py index ae1123e8ecc8b2dd9b8bf3af36dccc95a430ac59..eede20a3dd6c71ed0bbf8a46283298ddb1c85e3d 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/Kover.py @@ -1,9 +1,124 @@ -from sklearn.metrics import precision_recall_fscore_support -from sklearn.cross_validation import StratifiedShuffleSplit as split -import numpy as np -# from sklearn.multiclass import OneVsRestClassifier -from ModifiedMulticlass import OneVsRestClassifier - -# Add weights - -pass +# from sklearn.metrics import precision_recall_fscore_support +# from sklearn.cross_validation import StratifiedShuffleSplit as split +# import numpy as np +# # from sklearn.multiclass import OneVsRestClassifier +# from ModifiedMulticlass import OneVsRestClassifier +# +# from sklearn import tree +# from sklearn.metrics import accuracy_score +# import numpy as np +# from ModifiedMulticlass import OneVsRestClassifier +# from SubSampling import subSample +# import logging +# # Add weights +# +# def DecisionTree(data, labels, arg, weights): +# depth = int(arg[0]) +# subSampling = float(arg[1]) +# if subSampling != 1.0: +# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, weights=weights) +# else: +# subSampledData, subSampledLabels, subSampledWeights = data, labels, weights +# isBad = False +# classifier = tree.DecisionTreeClassifier(max_depth=depth) +# #classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth)) +# classifier.fit(subSampledData, subSampledLabels, subSampledWeights) +# prediction = classifier.predict(data) +# accuracy = accuracy_score(labels, prediction) +# if accuracy < 0.5: +# isBad = True +# +# return classifier, prediction, isBad, accuracy +# +# +# def getConfig(classifierConfig): +# depth = classifierConfig[0] +# subSampling = classifierConfig[1] +# return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' ' +# +# +# def gridSearch(data, labels, metric="accuracy_score"): +# minSubSampling = 1.0/(len(labels)/2) +# bestSettings = [] +# bestResults = [] +# classifier = tree.DecisionTreeClassifier(max_depth=1) +# preliminary_accuracies = np.zeros(50) +# for i in range(50): +# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.05) +# classifier.fit(subSampledData, subSampledLabels) +# prediction = classifier.predict(data) +# preliminary_accuracies[i] = accuracy_score(labels, prediction) +# preliminary_accuracy = np.mean(preliminary_accuracies) +# if preliminary_accuracy < 0.50: +# for max_depth in np.arange(10)+1: +# for subSampling in sorted(np.arange(20, dtype=float)+1/20, reverse=True): +# if subSampling > minSubSampling: +# accuracies = np.zeros(50) +# for i in range(50): +# if subSampling != 1.0: +# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling) +# else: +# subSampledData, subSampledLabels, = data, labels +# classifier = tree.DecisionTreeClassifier(max_depth=max_depth) +# classifier.fit(subSampledData, subSampledLabels) +# prediction = classifier.predict(data) +# accuracies[i] = accuracy_score(labels, prediction) +# accuracy = np.mean(accuracies) +# if 0.5 < accuracy < 0.60: +# bestSettings.append([max_depth, subSampling]) +# bestResults.append(accuracy) +# else: +# preliminary_accuracies = np.zeros(50) +# if minSubSampling < 0.01: +# for i in range(50): +# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.01) +# classifier.fit(subSampledData, subSampledLabels) +# prediction = classifier.predict(data) +# preliminary_accuracies[i] = accuracy_score(labels, prediction) +# preliminary_accuracy = np.mean(preliminary_accuracies) +# if preliminary_accuracy < 0.50: +# for subSampling in sorted((np.arange(19, dtype=float)+1)/200, reverse=True): +# if minSubSampling < subSampling: +# accuracies = np.zeros(50) +# for i in range(50): +# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling) +# classifier = tree.DecisionTreeClassifier(max_depth=1) +# classifier.fit(subSampledData, subSampledLabels) +# prediction = classifier.predict(data) +# accuracies[i] = accuracy_score(labels, prediction) +# accuracy = np.mean(accuracies) +# if 0.5 < accuracy < 0.60: +# bestSettings.append([1, subSampling]) +# bestResults.append(accuracy) +# else: +# for subSampling in sorted((np.arange(19, dtype=float)+1)/2000, reverse=True): +# accuracies = np.zeros(50) +# for i in range(50): +# subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling) +# if minSubSampling < subSampling: +# classifier1 = tree.DecisionTreeClassifier(max_depth=1) +# classifier1.fit(subSampledData, subSampledLabels) +# prediction = classifier1.predict(data) +# accuracies[i] = accuracy_score(labels, prediction) +# accuracy = np.mean(accuracies) +# if 0.5 < accuracy < 0.60: +# bestSettings.append([1, subSampling]) +# bestResults.append(accuracy) +# +# assert bestResults!=[], "No good settings found for Decision Tree!" +# +# return getBestSetting(bestSettings, bestResults) +# +# +# def getBestSetting(bestSettings, bestResults): +# diffTo52 = 100.0 +# bestSettingsIndex = 0 +# for resultIndex, result in enumerate(bestResults): +# if abs(0.55-result) < diffTo52: +# diffTo52 = abs(0.55-result) +# bestResult = result +# bestSettingsIndex = resultIndex +# logging.debug("\t\tInfo:\t Best Reslut : "+str(result)) +# +# return map(lambda p: round(p, 4), bestSettings[bestSettingsIndex]) +# # return map(round(,4), bestSettings[bestSettingsIndex]) \ No newline at end of file