Skip to content
Snippets Groups Projects
Commit 99432b2a authored by bbauvin's avatar bbauvin
Browse files

Refactored and added subsampling file in classifiers module

parent 8ea80c0f
Branches
Tags
No related merge requests found
......@@ -14,7 +14,7 @@ import logging # To create Log-Files
# Import own modules
import DBCrawl # Functions to read Images from Database
import Code.MonoView.ExportResults # Functions to render results
import FeatExtraction # Functions to extract the features from Database
import FeatExtraction # Functions to extract the views from Database
# Author-Info
__author__ = "Nikolas Huelsmann"
......@@ -24,7 +24,7 @@ __date__ = 2016-03-25
### Argument Parser
parser = argparse.ArgumentParser(
description='This method permits to export one or more features at the same time for a database of images (path, name). To extract one feature activate it by using the specific argument (e.g. -RGB). For each feature you can define the parameters by using the optional arguments (e.g. --RGB_Hist 32). The results will be exported to a CSV-File.',
description='This method permits to export one or more views at the same time for a database of images (path, name). To extract one feature activate it by using the specific argument (e.g. -RGB). For each feature you can define the parameters by using the optional arguments (e.g. --RGB_Hist 32). The results will be exported to a CSV-File.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
groupStandard = parser.add_argument_group('Standard arguments')
......@@ -120,7 +120,7 @@ logging.debug("### Main Programm for Feature Extraction ###")
logging.debug("### Extraction - NameDB=" + nameDB + ", Path=" + path + ", Features=" + features)
################################ Read Images from Database
# Determine the Database to extract features
# Determine the Database to extract views
logging.debug("Start:\t Exportation of images from DB")
......
......@@ -33,7 +33,7 @@ groupStandard = parser.add_argument_group('Standard arguments')
groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console')
groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)', default='DB')
groupStandard.add_argument('--feat', metavar='STRING', action='store', help='Name of Feature for Classification (default: %(default)s)', default='RGB')
groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the features (default: %(default)s)', default='Results-FeatExtr/')
groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)', default='Results-FeatExtr/')
groupStandard.add_argument('--fileCL', metavar='STRING', action='store', help='Name of classLabels CSV-file (default: %(default)s)', default='classLabels.csv')
groupStandard.add_argument('--fileCLD', metavar='STRING', action='store', help='Name of classLabels-Description CSV-file (default: %(default)s)', default='classLabels-Description.csv')
groupStandard.add_argument('--fileFeat', metavar='STRING', action='store', help='Name of feature CSV-file (default: %(default)s)', default='feature.csv')
......
......@@ -25,10 +25,10 @@ groupStandard.add_argument('--name', metavar='STRING', action='store', help='Nam
default='Caltech')
groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of database : .hdf5 or .csv',
default='.csv')
groupStandard.add_argument('--features', metavar='STRING', action='store',
help='Name of the features selected for learning', default='RGB:HOG:SIFT')
groupStandard.add_argument('--views', metavar='STRING', action='store',
help='Name of the views selected for learning', default='RGB:HOG:SIFT')
groupStandard.add_argument('--pathF', metavar='STRING', action='store',
help='Path to the features (default: %(default)s)',
help='Path to the views (default: %(default)s)',
default='../FeatExtraction/Results-FeatExtr/')
groupClass = parser.add_argument_group('Classification arguments')
......@@ -69,9 +69,9 @@ groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store',
help='Configuration for the monoview classifier', default='100:10:5')
args = parser.parse_args()
features = args.features.split(":")
views = args.features.split(":")
dataBaseType = args.type
NB_VIEW = len(features)
NB_VIEW = len(views)
mumboClassifierConfig = [argument.split(':') for argument in args.MU_config]
LEARNING_RATE = args.CL_split
......@@ -88,7 +88,7 @@ MumboArguments = (mumboClassifierConfig, NB_ITER, classifierNames)
dir = os.path.dirname(os.path.abspath(__file__)) + "/Results/"
logFileName = datetime.datetime.now().strftime(
"%Y_%m_%d") + "-CMultiV-" + args.CL_type + "-" + "_".join(features) + "-" + args.name + "-LOG"
"%Y_%m_%d") + "-CMultiV-" + args.CL_type + "-" + "_".join(views) + "-" + args.name + "-LOG"
logFile = dir + logFileName
if os.path.isfile(logFile + ".log"):
for i in range(1, 20):
......@@ -106,14 +106,14 @@ if (args.log):
t_start = time.time()
logging.info("### Main Programm for Multiview Classification")
logging.info("### Classification - Database : " + str(args.name) + " ; Views : " + ", ".join(features) +
logging.info("### Classification - Database : " + str(args.name) + " ; Views : " + ", ".join(views) +
" ; Algorithm : " + args.CL_type + " ; Cores : " + str(NB_CORES))
logging.info("Start:\t Read CSV Database Files for " + args.name)
logging.info("Start:\t Read "+str.upper(type[1:])+" Database Files for " + args.name)
getDatabase = getattr(DB, "get" + args.name + "DB" + dataBaseType[1:])
DATASET, LABELS_DICTIONARY = getDatabase(features, args.pathF, args.name, NB_CLASS, LABELS_NAMES)
DATASET, LABELS_DICTIONARY = getDatabase(views, args.pathF, args.name, NB_CLASS, LABELS_NAMES)
datasetLength = DATASET["/datasetLength"][...]
dataBaseType = "hdf5"
......@@ -121,7 +121,7 @@ logging.info("Info:\t Labels used: " + ", ".join(LABELS_DICTIONARY.values()))
logging.info("Info:\t Length of dataset:" + str(datasetLength))
for viewIndex in range(NB_VIEW):
logging.info("Info:\t Shape of " + features[viewIndex] + " :" + str(
logging.info("Info:\t Shape of " + views[viewIndex] + " :" + str(
DATASET["View" + str(viewIndex) + "/shape"][...]))
logging.info("Done:\t Read Database Files")
......@@ -192,11 +192,11 @@ times = (extractionTime, kFoldLearningTime, kFoldPredictionTime, classificationT
stringAnalysis, imagesAnalysis = analysisModule.execute(kFoldClassifier, kFoldPredictedTrainLabels,
kFoldPredictedTestLabels, kFoldPredictedValidationLabels, DATASET,
NB_CLASS, trainArguments, LEARNING_RATE, LABELS_DICTIONARY,
features, NB_CORES, times, NB_VIEW, kFolds, args.name, nbFolds,
views, NB_CORES, times, NB_VIEW, kFolds, args.name, nbFolds,
validationIndices, datasetLength)
labelsSet = set(LABELS_DICTIONARY.values())
logging.info(stringAnalysis)
featureString = "-".join(features)
featureString = "-".join(views)
labelsString = "-".join(labelsSet)
timestr = time.strftime("%Y%m%d-%H%M%S")
outputFileName = "Results/" + timestr + "Results-" + args.CL_type + "-" + ":".join(
......
......@@ -25,7 +25,7 @@ def execute(classifier, predictedTrainLabels, predictedTestLabels, trainLabels,
fusionClassifierConfig)
#monoviewClassifierConfig+'\n\n '+ \
stringAnalysis = "\n"+fusionType+" classification using "+monoviewClassifier+ 'as monoview classifier '+ \
"Learning on \n\t- "+", ".join(features)+" as features\n\t- "+", ".join(LABELS_DICTIONARY.values())+ \
"Learning on \n\t- "+", ".join(features)+" as views\n\t- "+", ".join(LABELS_DICTIONARY.values())+ \
" as labels\n\t- "+str(trainingSetLength)+" training examples, "+str(testingSetLength)+ \
" testing examples ("+str(LEARNING_RATE)+" rate)\n\n With "+str(NB_CORES)+' cores used for computing.\n\n'
......@@ -48,7 +48,7 @@ def execute(classifier, predictedTrainLabels, predictedTestLabels, trainLabels,
# stringAnalysis+= "\t- Iteration "+str(iterIndex+1)+"\n\t\t Accuracy on train : "+ \
# str(accuracy_score(trainLabels, predictedTrainLabelsByIter[iterIndex]))+'\n\t\t Accuracy on test : '+ \
# str(accuracy_score(testLabels, predictedTestLabelsByIter[iterIndex]))+'\n\t\t Selected View : '+ \
# features[int(bestViews[iterIndex])]+"\n"
# views[int(bestViews[iterIndex])]+"\n"
#
# name, image = plotAccuracyByIter(predictedTrainLabelsByIter, predictedTestLabelsByIter, trainLabels, testLabels, NB_ITER)
imagesAnalysis = {}
......
from sklearn import tree
from sklearn.metrics import precision_recall_fscore_support
import numpy as np
# from sklearn.multiclass import OneVsRestClassifier
from ModifiedMulticlass import OneVsRestClassifier
import random
from SubSampling import subSample
# Add weights
def getLabelSupports(CLASS_LABELS):
labels = set(CLASS_LABELS)
supports = [CLASS_LABELS.tolist().count(label) for label in labels]
return supports, dict((label, index) for label, index in zip(labels, range(len(labels))))
def isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict):
if nbTrainingExamples[labelDict[CLASS_LABELS[index]]] != 0:
nbTrainingExamples[labelDict[CLASS_LABELS[index]]] -= 1
return True, nbTrainingExamples
else:
return False, nbTrainingExamples
def subSample(data, labels, weights, subSampling):
nbExamples = len(labels)
labelSupports, labelDict = getLabelSupports(labels)
nbTrainingExamples = [int(support * subSampling) for support in labelSupports]
trainingExamplesIndices = []
while nbTrainingExamples != [0 for i in range(len(labelSupports))]:
index = int(random.randint(0, nbExamples - 1))
isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, labels, labelDict)
if isUseFull:
trainingExamplesIndices.append(index)
subSampledData = []
subSampledLabels = []
subSampledWeights = []
for index in trainingExamplesIndices:
subSampledData.append(data[index])
subSampledLabels.append(labels[index])
subSampledWeights.append(weights[index])
return np.array(subSampledData), np.array(subSampledLabels), np.array(subSampledWeights)
def DecisionTree(data, labels, arg, weights):
depth = int(arg[0])
subSampling = float(arg[1])
......@@ -49,11 +14,9 @@ def DecisionTree(data, labels, arg, weights):
subSampledData, subSampledLabels, subSampledWeights = data, labels, weights
isBad = False
classifier = tree.DecisionTreeClassifier(max_depth=depth)
#classifier = OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth))
classifier.fit(subSampledData, subSampledLabels, subSampledWeights)
prediction = classifier.predict(data)
labelsSet = set(prediction)
pTr, r, f1, s = precision_recall_fscore_support(labels, prediction, sample_weight=weights)
if np.mean(pTr) < 0.5:
isBad = True
......
import numpy as np
import random
def getLabelSupports(CLASS_LABELS):
labels = set(CLASS_LABELS)
supports = [CLASS_LABELS.tolist().count(label) for label in labels]
return supports, dict((label, index) for label, index in zip(labels, range(len(labels))))
def isUseful(nbTrainingExamples, index, CLASS_LABELS, labelDict):
if nbTrainingExamples[labelDict[CLASS_LABELS[index]]] != 0:
nbTrainingExamples[labelDict[CLASS_LABELS[index]]] -= 1
return True, nbTrainingExamples
else:
return False, nbTrainingExamples
def subSample(data, labels, weights, subSampling):
nbExamples = len(labels)
labelSupports, labelDict = getLabelSupports(labels)
nbTrainingExamples = [int(support * subSampling) for support in labelSupports]
trainingExamplesIndices = []
while nbTrainingExamples != [0 for i in range(len(labelSupports))]:
index = int(random.randint(0, nbExamples - 1))
isUseFull, nbTrainingExamples = isUseful(nbTrainingExamples, index, labels, labelDict)
if isUseFull:
trainingExamplesIndices.append(index)
subSampledData = []
subSampledLabels = []
subSampledWeights = []
for index in trainingExamplesIndices:
subSampledData.append(data[index])
subSampledLabels.append(labels[index])
subSampledWeights.append(weights[index])
return np.array(subSampledData), np.array(subSampledLabels), np.array(subSampledWeights)
\ No newline at end of file
......@@ -10,7 +10,6 @@ import logging
def findMainView(bestViews):
views = list(set(bestViews))
mainView = ()
viewCount = np.array([list(bestViews).count(view) for view in views])
mainView = views[np.argmax(viewCount)]
return mainView
......@@ -32,7 +31,7 @@ def plotAccuracyByIter(trainAccuracy, testAccuracy, validationAccuracy, NB_ITER,
# for label, x, y in zip(bestViews, x, trainAccuracy):
# if label != mainView:
# plt.annotate(
# features[int(label)],
# views[int(label)],
# xy=(x, y), xytext=(-20, 20),
# textcoords='offset points', ha='right', va='bottom',
# bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
......
import os
os.system('python ExecMultiview.py -log --name MultiOmic --features Methyl:MiRNA:RNASEQ:Clinical --pathF /home/bbauvin/Documents/Data/Data_multi_omics/ --CL_split 5 --CL_nb_class 2 --CL_classes Positive:Negative --CL_type Mumbo --CL_cores 4 --MU_type DecisionTree:DecisionTree:DecisionTree:DecisionTree --MU_config 1:0.1 1:0.1 1:0.9 1:0.9 --MU_iter 100')
\ No newline at end of file
os.system('python ExecMultiview.py -log --name MultiOmic --type .hdf5 --views Methyl:MiRNA:RNASEQ:Clinical --pathF /home/bbauvin/Documents/Data/Data_multi_omics/ --CL_split 0.3 --CL_nbFolds 5 --CL_nb_class 2 --CL_classes Positive:Negative --CL_type Mumbo --CL_cores 4 --MU_type DecisionTree:DecisionTree:DecisionTree:DecisionTree --MU_config 1:0.09 1:0.09 1:0.9 2:1.0 --MU_iter 100')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment