diff --git a/Code/FeatExtraction/ClassifMonoView.py b/Code/FeatExtraction/ClassifMonoView.py index c3797c43ff4151f0025490ca5977049928a1d2ac..fc7fa2d171319ae941cd15a82df1be0e9caf64aa 100644 --- a/Code/FeatExtraction/ClassifMonoView.py +++ b/Code/FeatExtraction/ClassifMonoView.py @@ -135,7 +135,7 @@ def calcTrainTest(X,y,split): # X_test: Test Data # y_test: Test Labels # num_estimators: number of trees -def calcClassifRandomForestCV(X_train, y_train, num_estimators): +def calcClassifRandomForestCV(X_train, y_train, num_estimators, cv_folds, clas_cores): # PipeLine with RandomForest classifier pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) @@ -143,7 +143,6 @@ def calcClassifRandomForestCV(X_train, y_train, num_estimators): # can be extended with: oob_score, min_samples_leaf, max_features param_rf = { 'classifier__n_estimators': num_estimators} - kfolds = 5 # pipeline: Gridsearch avec le pipeline comme estimator # param: pour obtenir le meilleur model il va essayer tous les possiblites # refit: pour utiliser le meilleur model apres girdsearch @@ -154,9 +153,9 @@ def calcClassifRandomForestCV(X_train, y_train, num_estimators): pipeline_rf, param_grid=param_rf, refit=True, - n_jobs=1, + n_jobs=clas_cores, scoring='accuracy', - cv=kfolds, + cv=cv_folds, ) rf_detector = grid_rf.fit(X_train, y_train) @@ -167,26 +166,26 @@ def calcClassifRandomForestCV(X_train, y_train, num_estimators): return (description, rf_detector) -def calcClassifRandomForest(X_train, X_test, y_test, y_train, num_estimators): - from sklearn.grid_search import ParameterGrid - param_rf = { 'classifier__n_estimators': num_estimators} - forest = RandomForestClassifier() - - bestgrid=0; - for g in ParameterGrid(grid): - forest.set_params(**g) - forest.fit(X_train,y_train) - score = forest.score(X_test, y_test) - - if score > best_score: - best_score = score - best_grid = g - - rf_detector = RandomForestClassifier() - rf_detector.set_params(**best_grid) - rf_detector.fit(X_train,y_train) +#def calcClassifRandomForest(X_train, X_test, y_test, y_train, num_estimators): +# from sklearn.grid_search import ParameterGrid +# param_rf = { 'classifier__n_estimators': num_estimators} +# forest = RandomForestClassifier() +# +# bestgrid=0; +# for g in ParameterGrid(grid): +# forest.set_params(**g) +# forest.fit(X_train,y_train) +# score = forest.score(X_test, y_test) +# +# if score > best_score: +# best_score = score +# best_grid = g +# +# rf_detector = RandomForestClassifier() +# rf_detector.set_params(**best_grid) +# rf_detector.fit(X_train,y_train) - #desc_estimators = best_grid - description = "Classif_" + "RF" + "-" + "CV_" + "NO" + "-" + "Trees_" + str(best_grid) +# #desc_estimators = best_grid +# description = "Classif_" + "RF" + "-" + "CV_" + "NO" + "-" + "Trees_" + str(best_grid) - return (description, rf_detector) \ No newline at end of file +# return (description, rf_detector) \ No newline at end of file diff --git a/Code/FeatExtraction/DBCrawl.py b/Code/FeatExtraction/DBCrawl.py index fb6078eac4ef98e597d7fddab72ed25ba60a86fc..1312d77a771e7fcd9660ae399d1af114be077416 100644 --- a/Code/FeatExtraction/DBCrawl.py +++ b/Code/FeatExtraction/DBCrawl.py @@ -4,9 +4,9 @@ # Import built-in modules import os # for iteration throug directories -import pandas as pd # for Series and DataFrames # Import 3rd party modules +import pandas as pd # for Series and DataFrames # Import own modules diff --git a/Code/FeatExtraction/ExecClassifMV.py b/Code/FeatExtraction/ExecClassifMonoView.py similarity index 55% rename from Code/FeatExtraction/ExecClassifMV.py rename to Code/FeatExtraction/ExecClassifMonoView.py index 51663e130a492715e97f75c5581a849883380c4e..692caad1131c964c91fb1837fb9e6b71a2c49a86 100644 --- a/Code/FeatExtraction/ExecClassifMV.py +++ b/Code/FeatExtraction/ExecClassifMonoView.py @@ -3,20 +3,12 @@ """ Script to perform feature parameter optimisation """ # Import built-in modules -#import cv2 # for OpenCV -#import cv # for OpenCV -#import datetime # for TimeStamp in CSVFile -#from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html -#import numpy as np # for arrays -#import time # for time calculations from argparse import ArgumentParser # for acommand line arguments # Import 3rd party modules # Import own modules -#import DBCrawl # Functions to read Images from Database -#import ExportResults # Functions to render results -#import FeatExtraction # Functions to extract the features from Database + # Author-Info __author__ = "Nikolas Huelsmann" diff --git a/Code/FeatExtraction/ExecFeatExtraction.py b/Code/FeatExtraction/ExecFeatExtraction.py index 5ac151b3681f29c2f5f6b8e2b5482f963fbb05df..7412ce13043b580c353cb38b9f02e3cd679641e5 100644 --- a/Code/FeatExtraction/ExecFeatExtraction.py +++ b/Code/FeatExtraction/ExecFeatExtraction.py @@ -3,13 +3,10 @@ """ Script to perform feature parameter optimisation """ # Import built-in modules -import cv2 # for OpenCV -import cv # for OpenCV -import datetime # for TimeStamp in CSVFile -from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html -import numpy as np # for arrays -import time # for time calculations -from argparse import ArgumentParser # for acommand line arguments +import datetime # for TimeStamp in CSVFile +import time # for time calculations +import argparse # for acommand line arguments +import textwrap # Import 3rd party modules @@ -25,52 +22,53 @@ __date__ = 2016-03-10 ### Argument Parser -parser = ArgumentParser(description='Export Features') +parser = argparse.ArgumentParser( +description='This methods permits to export one or more features at the same time for a database of images (path, name). To extract one feature activate it by using the specific argument (e.g. -RGB). For each feature you can define the parameters by using the optional arguments (e.g. --RGB_Hist 32). The results will be exported to a CSV-File.', +formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument('--name', action='store', help='Name of DB, default DB', default='DB') -parser.add_argument('--path', action='store', help='Path to the database e.g. D:\\Caltech', default='D:\\CaltechMini') -parser.add_argument('--cores', action='store', help='Number of cores used for parallelization of HOG, default 1', type=int, default=1) +groupStandard = parser.add_argument_group('necessary arguments:') +groupStandard.add_argument('--name', metavar='STRING', action='store', help='Select a name of DB, e.g. Caltech (default: %(default)s)', default='DB') +groupStandard.add_argument('--path', metavar='STRING', action='store', help='Path to the database (default: %(default)s)', default='D:\\CaltechMini') -parser.add_argument('--RGB', action='store_true', help='Use option to activate RGB') -parser.add_argument('--RGB_Hist', action='store', help='RGB: Number of bins for histogram, default 16', type=int, default=16) -parser.add_argument('--RGB_CI', action='store', help='RGB: Max Color Intensity [0 to VALUE], default 256', type=int, default=256) -parser.add_argument('--RGB_NMinMax', action='store_true', help='RGB: Use option to actvate MinMax Norm, default distribtion') -parser.add_argument('--HSV', action='store_true', help='Use option to activate HSV') -parser.add_argument('--HSV_H', action='store', help='HSV: Number of bins for Hue, default 8', type=int, default=8) -parser.add_argument('--HSV_S', action='store', help='HSV: Number of bins for Saturation, default 3', type=int, default=3) -parser.add_argument('--HSV_V', action='store', help='HSV: Number of bins for Value, default 3', type=int, default=3) -parser.add_argument('--HSV_NMinMax', action='store_true', help='HSV: Use option to actvate MinMax Norm, default distribtion') - -parser.add_argument('--SIFT', action='store_true', help='Use option to activate SIFT') -parser.add_argument('--SIFT_Cluster', action='store', help='SIFT: Number of k-means cluster, default 50', type=int, default=50) -parser.add_argument('--SIFT_NMinMax', action='store_true', help='SIFT: Use option to actvate MinMax Norm, default distribtion') - -parser.add_argument('--SURF', action='store_true', help='Use option to activate SURF') -parser.add_argument('--SURF_Cluster', action='store', help='SURF: Number of k-means cluster, default 50', type=int, default=50) -parser.add_argument('--SURF_NMinMax', action='store_true', help='SURF: Use option to actvate MinMax Norm, default distribtion') - -parser.add_argument('--HOG', action='store_true', help='Use option to activate HOG') -parser.add_argument('--HOG_CellD', action='store', help='HOG: CellDimension for local histograms, default 5', type=int, default=5) -parser.add_argument('--HOG_Orient', action='store', help='HOG: Number of bins of local histograms , default 8', type=int, default=8) -parser.add_argument('--HOG_Cluster', action='store', help='HOG: Number of k-means cluster, default 12', type=int, default=12) -parser.add_argument('--HOG_Iter', action='store', help='HOG: Max. number of iterations for clustering, default 100', type=int, default=100) +groupRGB = parser.add_argument_group('RGB arguments:') +groupRGB.add_argument('-RGB', action='store_true', help='Use option to activate RGB') +groupRGB.add_argument('--RGB_Bins', metavar='INT', action='store', help='Number of bins for histogram', type=int, default=16) +groupRGB.add_argument('--RGB_CI', metavar='INT', action='store', help='Max Color Intensity [0 to VALUE]', type=int, default=256) +groupRGB.add_argument('-RGB_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') +groupHSV = parser.add_argument_group('HSV arguments:') +groupHSV.add_argument('-HSV', action='store_true', help='Use option to activate HSV') +groupHSV.add_argument('--HSV_H_Bins', metavar='INT', action='store', help='Number of bins for Hue', type=int, default=16) +groupHSV.add_argument('--HSV_S_Bins', metavar='INT', action='store', help='Number of bins for Saturation', type=int, default=4) +groupHSV.add_argument('--HSV_V_Bins', metavar='INT', action='store', help='Number of bins for Value', type=int, default=4) +groupHSV.add_argument('-HSV_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') -# CELL_DIMENSION is the dimension of the cells on which we will compute local histograms -# NB_ORIENTATIONS is the number of bins of this local histograms -# intuitively, if CELL_DIMENSION is small it's better to have a small NB_ORIENTATIONS in order to have meaningful local histograms -# NB_CLUSTERS is the number of bins of the global histograms (the number of clusters in the KMEANS algorithm used for the bag of word) -# MAXITER is the maximum number of iteration for the clustering algorithm - +groupSIFT = parser.add_argument_group('SIFT arguments:') +groupSIFT.add_argument('-SIFT', action='store_true', help='Use option to activate SIFT') +groupSIFT.add_argument('--SIFT_Cluster', metavar='INT', action='store', help='Number of k-means cluster', type=int, default=50) +groupSIFT.add_argument('-SIFT_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') + +groupSURF = parser.add_argument_group('SURF arguments:') +groupSURF.add_argument('-SURF', action='store_true', help='Use option to activate SURF') +groupSURF.add_argument('--SURF_Cluster', metavar='INT', action='store', help='Number of k-means cluster', type=int, default=50) +groupSURF.add_argument('-SURF_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') + +groupHOG = parser.add_argument_group('HOG arguments:') +groupHOG.add_argument('-HOG', action='store_true', help='Use option to activate HOG') +groupHOG.add_argument('--HOG_CellD', metavar='INT', action='store', help='CellDimension for local histograms', type=int, default=5) +groupHOG.add_argument('--HOG_Orient', metavar='INT', action='store', help='Number of bins of local histograms', type=int, default=8) +groupHOG.add_argument('--HOG_Cluster', metavar='INT', action='store', help='Number of k-means cluster', type=int, default=12) +groupHOG.add_argument('--HOG_Iter', metavar='INT', action='store', help='Max. number of iterations for clustering', type=int, default=100) +groupHOG.add_argument('--HOG_cores', metavar='INT', action='store', help='Number of cores for HOG', type=int, default=1) + +### Read args args = parser.parse_args() -path = args.path -NB_CORES = args.cores -nameDB = args.name -### Helper +nameDB = args.name +path = args.path -# Function to transform the boolean deciscion of norm into a string +### Helper-Function to transform the boolean deciscion of norm into a string def boolNormToStr(norm): if(norm): return "MinMax" @@ -92,7 +90,7 @@ if(args.SURF): if(args.HOG): features = features + "HOG" -print "Infos:\t NameDB=" + nameDB + ", Path=" + path + ", Cores=" + str(NB_CORES) + ", Features=" + features +print "Infos:\t NameDB=" + nameDB + ", Path=" + path + ", Features=" + features ################################ Read Images from Database # Determine the Database to extract features @@ -119,16 +117,12 @@ if(args.RGB): print "RGB:\t Start" t_rgb_start = time.time() - - numberOfBins = args.RGB_Hist - maxColorIntensity = args.RGB_CI - boolNormMinMax = args.RGB_NMinMax - + # Infos - print "RGB:\t NumberOfBins=" + str(numberOfBins) + ", MaxColorIntensity=" + str(maxColorIntensity) + ", Norm=" + boolNormToStr(boolNormMinMax) + print "RGB:\t NumberOfBins=" + str(args.RGB_Bins) + ", MaxColorIntensity=" + str(args.RGB_CI) + ", Norm=" + boolNormToStr(args.RGB_NMinMax) # Extract Feature from DB - rgb_feat_desc,rgb_f_extr_res = FeatExtraction.calcRGBColorHisto(nameDB, dfImages, numberOfBins, maxColorIntensity, boolNormMinMax) + rgb_feat_desc,rgb_f_extr_res = FeatExtraction.calcRGBColorHisto(nameDB, dfImages, args.RGB_Bins, args.RGB_CI, args.RGB_NMinMax) t_rgb = time.time() - t_rgb_start print "RGB:\t Done in: " + str(t_rgb) + "[s]" @@ -139,17 +133,16 @@ if(args.HSV): print "HSV:\t Start" t_hsv_start = time.time() - h_bins = args.HSV_H - s_bins = args.HSV_S - v_bins = args.HSV_V + h_bins = args.HSV_H_Bins + s_bins = args.HSV_S_Bins + v_bins = args.HSV_V_Bins histSize = [h_bins, s_bins, v_bins] - boolNormMinMax = args.HSV_NMinMax # Infos - print "HSV:\t HSVBins=[" + str(h_bins) + "," + str(s_bins) + "," + str(v_bins) + "], Norm=" + boolNormToStr(boolNormMinMax) + print "HSV:\t HSVBins=[" + str(h_bins) + "," + str(s_bins) + "," + str(v_bins) + "], Norm=" + boolNormToStr(args.HSV_NMinMax) # Extract Feature from DB - hsv_feat_desc,hsv_f_extr_res = FeatExtraction.calcHSVColorHisto(nameDB, dfImages, histSize, boolNormMinMax) + hsv_feat_desc,hsv_f_extr_res = FeatExtraction.calcHSVColorHisto(nameDB, dfImages, histSize, args.HSV_NMinMax) t_hsv = time.time() - t_hsv_start print "HSV:\t Done in: " + str(t_hsv) + "[s]" @@ -161,13 +154,11 @@ if(args.SIFT): t_sift_start = time.time() boolSIFT = True - cluster = args.SIFT_Cluster - boolNormMinMax = args.SIFT_NMinMax - print "SIFT:\t Cluster=" + str(cluster) + ", Norm=" + boolNormToStr(boolNormMinMax) + print "SIFT:\t Cluster=" + str(args.SIFT_Cluster) + ", Norm=" + boolNormToStr(args.SIFT_NMinMax) sift_descriptors,sift_des_list = FeatExtraction.calcSURFSIFTDescriptors(dfImages, boolSIFT) - sift_feat_desc,sift_f_extr_res = FeatExtraction.calcSURFSIFTHisto(nameDB, dfImages, cluster, boolNormMinMax, sift_descriptors, sift_des_list, boolSIFT) + sift_feat_desc,sift_f_extr_res = FeatExtraction.calcSURFSIFTHisto(nameDB, dfImages, args.SIFT_Cluster, args.SIFT_NMinMax, sift_descriptors, sift_des_list, boolSIFT) t_sift = time.time() - t_sift_start print "SIFT:\t Done in: " + str(t_sift) + "[s]" @@ -178,14 +169,12 @@ if(args.SURF): t_surf_start = time.time() boolSIFT = False - cluster = args.SURF_Cluster - boolNormMinMax = args.SURF_NMinMax - print "SURF:\t Cluster=" + str(cluster) + ", Norm=" + boolNormToStr(boolNormMinMax) + print "SURF:\t Cluster=" + str(args.SURF_Cluster) + ", Norm=" + boolNormToStr(args.SURF_NMinMax) # Extract Feature from DB surf_descriptors,surf_des_list = FeatExtraction.calcSURFSIFTDescriptors(dfImages, boolSIFT) - surf_feat_desc,surf_f_extr_res = FeatExtraction.calcSURFSIFTHisto(nameDB, dfImages, cluster, boolNormMinMax, surf_descriptors, surf_des_list, boolSIFT) + surf_feat_desc,surf_f_extr_res = FeatExtraction.calcSURFSIFTHisto(nameDB, dfImages, args.SURF_Cluster, args.SURF_NMinMax, surf_descriptors, surf_des_list, boolSIFT) t_surf = time.time() - t_surf_start print "SURF:\t Done in: " + str(t_surf) + "[s]" @@ -198,6 +187,7 @@ if(args.HOG): NB_ORIENTATIONS = args.HOG_Orient NB_CLUSTERS = args.HOG_Cluster MAXITER = args.HOG_Iter + NB_CORES = args.HOG_cores print "HOG:\t CellDim=" + str(CELL_DIMENSION) + ", NbOrientations=" + str(NB_ORIENTATIONS) +", Cluster=" + str(NB_CLUSTERS) + ", MaxIter=" + str(MAXITER) diff --git a/Code/FeatExtraction/ExecFeatParaOpt.py b/Code/FeatExtraction/ExecFeatParaOpt.py index 2211c07ae8b9e458c3b41fe48eaa36375ee3a0ee..5d5ed749629a53841c6c2a50c9d0178e6aa8ba3a 100644 --- a/Code/FeatExtraction/ExecFeatParaOpt.py +++ b/Code/FeatExtraction/ExecFeatParaOpt.py @@ -3,34 +3,94 @@ """ Script to perform feature parameter optimisation """ # Import built-in modules -import cv2 # for OpenCV -import cv # for OpenCV import datetime # for TimeStamp in CSVFile -from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html -import numpy as np # for arrays -from argparse import ArgumentParser # for acommand line arguments +import argparse # for acommand line arguments -# Import sci-kit learn -from sklearn.ensemble import RandomForestClassifier +# Import 3rd party modules +import numpy as np # for arrays # Import own modules -import DBCrawl # Functions to read Images from Database -import FeatParaOpt # Functions to perform parameter optimisation -import ExportResults # Functions to render results +import DBCrawl # Functions to read Images from Database +import FeatParaOpt # Functions to perform parameter optimisation +import ExportResults # Functions to render results # Author-Info __author__ = "Nikolas Huelsmann" -__status__ = "Development" #Production, Development, Prototype +__status__ = "Development" #Production, Development, Prototype __date__ = 2016-01-23 +### Argument Parser + +parser = argparse.ArgumentParser( +description='This methods permits to perform an optimisation of the parameter of one feature. Therefore you have so specify which feature to use (e.g. --feature RGB) and which of his parameters (the parameters depend on the feature chosen, e.g. for RGB: --parameter Bins). The method will calculate the results in your given range and export the results to a CSV-File.', +formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +groupStandard = parser.add_argument_group('necessary arguments:') +groupStandard.add_argument('--name', metavar='STRING', action='store', help='Select a name of DB, e.g. Caltech (default: %(default)s)', default='DB') +groupStandard.add_argument('--path', metavar='STRING', action='store', help='Path to the database (default: %(default)s)', default='D:\\CaltechMini') + +groupOpt = parser.add_argument_group('Optimisation arguments:') +groupOpt.add_argument('--feature', choices=['RGB', 'HSV', 'SURF', 'SIFT', 'HOG'], help='Set feature from list (RGB, HSV, ..)', default='RGB') +groupOpt.add_argument('--param', choices=['RGB_Bins', 'RGB_MaxCI', 'HSV_H_Bins', 'HSV_S_Bins', 'HSV_V_Bins', 'SIFT_Cluster', 'SURF_Cluster', 'HOG_Cluster'], help='Parameter to optimise (remember depends on feature)', default='RGB_Bins') +groupOpt.add_argument('--valueStart', metavar='INT', action='store', help='Start-Value for optimisation range', type=int) +groupOpt.add_argument('--valueEnd', metavar='INT', action='store', help='End-Value for optimisation range', type=int) +groupOpt.add_argument('--nCalcs', metavar='INT', action='store', help='Number of calculations between Start and End-Value', type=int) + +groupRGB = parser.add_argument_group('RGB arguments:') +groupRGB.add_argument('--RGB_Bins', metavar='INT', action='store', help='Number of bins for histogram', type=int, default=16) +groupRGB.add_argument('--RGB_CI', metavar='INT', action='store', help='Max Color Intensity [0 to VALUE]', type=int, default=256) +groupRGB.add_argument('-RGB_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') + +groupHSV = parser.add_argument_group('HSV arguments:') +groupHSV.add_argument('--HSV_H_Bins', metavar='INT', action='store', help='Number of bins for Hue', type=int, default=16) +groupHSV.add_argument('--HSV_S_Bins', metavar='INT', action='store', help='Number of bins for Saturation', type=int, default=4) +groupHSV.add_argument('--HSV_V_Bins', metavar='INT', action='store', help='Number of bins for Value', type=int, default=4) +groupHSV.add_argument('-HSV_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') + +groupSIFT = parser.add_argument_group('SIFT arguments:') +groupSIFT.add_argument('--SIFT_Cluster', metavar='INT', action='store', help='Number of k-means cluster', type=int, default=50) +groupSIFT.add_argument('-SIFT_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') + +groupSURF = parser.add_argument_group('SURF arguments:') +groupSURF.add_argument('--SURF_Cluster', metavar='INT', action='store', help='Number of k-means cluster', type=int, default=50) +groupSURF.add_argument('-SURF_NMinMax', action='store_true', help='Use option to actvate MinMax Norm instead of Distribution') + +groupHOG = parser.add_argument_group('HOG arguments:') +groupHOG.add_argument('--HOG_CellD', metavar='INT', action='store', help='CellDimension for local histograms', type=int, default=5) +groupHOG.add_argument('--HOG_Orient', metavar='INT', action='store', help='Number of bins of local histograms', type=int, default=8) +groupHOG.add_argument('--HOG_Cluster', metavar='INT', action='store', help='Number of k-means cluster', type=int, default=12) +groupHOG.add_argument('--HOG_Iter', metavar='INT', action='store', help='Max. number of iterations for clustering', type=int, default=100) +groupHOG.add_argument('--HOG_cores', metavar='INT', action='store', help='Number of cores for HOG', type=int, default=1) + +groupClass = parser.add_argument_group('Classification arguments:') +groupClass.add_argument('--CL_split', metavar='DOUBLE', action='store', help='Determine the the train/test split', type=double, default=0.7) +groupClass.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees', default='[50, 100, 150, 200]') +groupClass.add_argument('--CL_RF_CV', metavar='INT', action='store', help='Number of k-folds for CV', type=int, default=3) +groupClass.add_argument('--CL_RF_Cores', metavar='INT', action='store', help='Number of cores', type=int, default=1) + + + +### Read args - transform in Arrays for function calls +args = parser.parse_args() +path = args.path +nameDB = args.name + +para_opt = [args.feature, args.param, args.valueStart, args.valueEnd, args.nCalcs] +para_RGB = [args.RGB_Bins, args.RGB_CI, args.RGB_NMinMax] +para_HSV = [args.HSV_H_Bins, args.HSV_S_Bins, args.HSV_V_Bins, args.HSV_NMinMax] +para_SIFT = [args.SIFT_Cluster, args.SIFT_NMinMax] +para_SURF = [args.SURF_Cluster, args.SURF_NMinMax] +para_HOG = [args.HOG_CellD, args.HOG_Orient, args.HOG_Cluster, args.HOG_Iter, args.HOG_cores] +para_Cl = [args.CL_split, args.CL_RF_trees, args.CL_RF_CV, args.CL_RF_Cores] + + ### Main Programm ################################ Read Images from Database # Determine the Database to extract features print "### Start of Main Programm for Feature Parameter Optimisation ###" -path ="D:\\Caltech" -nameDB = "CT" + print "### Start:\t Exportation of images from DB ###" @@ -45,32 +105,12 @@ print "### Done:\t Exportation of Images from DB ###" ################################ Parameter Optimisation # Setup -#feature = "RGB" -#parameter = "Bins" -#valueStart = int(8) -#valueEnd = int(64) -#nCalculations = int(8) -#boolCV = True - -#print '### Optimisation - Feature:' + str(feature) + " Parameter:" + str(parameter) + " from:" + str(valueStart) + " to:" + str(valueEnd) + " in #calc:" + str(nCalculations) + " withCV:" + str(boolCV) + " ###" - -#print "### Start: Feautre Optimisation ###" -#df_feat_res = FeatParaOpt.perfFeatMonoV(nameDB, dfImages,feature, parameter, valueStart, valueEnd, nCalculations, boolCV) -#print "### Done: Feautre Optimisation ###" - -# Setup SURF -feature = "SURF" -parameter = "Cluster" -valueStart = 50 -valueEnd = 200 -nCalculations = 4 -boolCV = True - -print '### Optimisation - Feature:' + str(feature) + " Parameter:" + str(parameter) + " from:" + str(valueStart) + " to:" + str(valueEnd) + " in #calc:" + str(nCalculations) + " withCV:" + str(boolCV) + " ###" - -print "### Start:\t Feautre Optimisation ###" -df_feat_res = FeatParaOpt.perfFeatMonoV(nameDB, dfImages,feature, parameter, valueStart, valueEnd, nCalculations, boolCV) -print "### Done:\t Feautre Optimisation ###" +print '### Optimisation - Feature:' + str(args.feature) + " Parameter:" + str(args.param) + " from:" + str(args.valueStart) + " to:" + str(args.valueEnd) + " in #calc:" + str(args.nCalcs) + " ###" + +print "### Start: Feautre Optimisation ###" +df_feat_res = FeatParaOpt.perfFeatMonoV(nameDB, dfImages, para_opt, para_RGB, para_HSV, para_SIFT, para_SURF, para_HOG, para_Cl) +print "### Done: Feautre Optimisation ###" + ################################ Render results print "### Start:\t Exporting to CSV ###" diff --git a/Code/FeatExtraction/ExportResults.py b/Code/FeatExtraction/ExportResults.py index f1a1209680469b4af8a432099d543bf1982b1dd8..99f797be1044b5b226d88eace67dfdb9da13cb1a 100644 --- a/Code/FeatExtraction/ExportResults.py +++ b/Code/FeatExtraction/ExportResults.py @@ -4,14 +4,14 @@ # Import built-in modules import os # for iteration throug directories +import string # to generate a range of letters + +# Import 3rd party modules import pandas as pd # for Series and DataFrames import numpy as np # for Numpy Arrays import matplotlib.pyplot as plt # for Plots from scipy.interpolate import interp1d # to Interpolate Data -import string # to generate a range of letters -from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, DrawingArea, HPacker # to generate the Annotations in plot - -# Import 3rd party modules +from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, HPacker # to generate the Annotations in plot # Import own modules diff --git a/Code/FeatExtraction/FeatExtraction.py b/Code/FeatExtraction/FeatExtraction.py index a716112715fbc1cdf2e4e6cfaac9208ded14d877..5ab462e6b2748151377eadda31ac31076d4addce 100644 --- a/Code/FeatExtraction/FeatExtraction.py +++ b/Code/FeatExtraction/FeatExtraction.py @@ -3,13 +3,11 @@ """ Code to Extract all Features from Database """ # Import built-in modules -import numpy as np # for numpy arrays -import pandas as pd # for panda DataFrame -import datetime # for time calculations - -# Import OpenCV + + +# Import 3rd party modules +import numpy as np # for numpy arrays import cv2 # for OpenCV -import cv # for SIFT/SURF from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html\n", # Import own modules @@ -49,7 +47,6 @@ def calcRGBColorHisto(nameDB, dfImages, numberOfBins, maxColorIntensity, boolNor param = "Bins_" + str(int(numberOfBins)) + "-" + "MaxCI_" + str(maxColorIntensity) + "-" + "Norm_" + norm description = nameDB + "-RGB-" + param - ## algo for images in npImages: diff --git a/Code/FeatExtraction/FeatParaOpt.py b/Code/FeatExtraction/FeatParaOpt.py index a708b77969661ca9cb1f1c48279e101c7b8ae953..e846cb95fa74931d5ef71cca6670d542889b232f 100644 --- a/Code/FeatExtraction/FeatParaOpt.py +++ b/Code/FeatExtraction/FeatParaOpt.py @@ -3,11 +3,11 @@ """ Function to optimise feature parameters """ # Import built-in modules -import time # for time calculations -import numpy as np# for numpy arrays -import pandas as pd # for Series and DataFrames +import time # for time calculations # Import 3rd party modules +import numpy as np # for numpy arrays +import pandas as pd # for Series and DataFrames # Import own modules import FeatExtraction # Functions for Feature Extractions# @@ -16,22 +16,28 @@ import ClassifMonoView # Functions for classification # Author-Info __author__ = "Nikolas Huelsmann" __status__ = "Development" #Production, Development, Prototype -__date__ = 2016-01-23 +__date__ = 2016-03-14 - -# dfImages: Database with all images -# feature: which feature? e.g. ColorHistogram -# paramter: which parameter should be tested? e.g. bins of histogram -# valueStart: Value for paramter to start with -# valueEnd: Value for paramter to end with -# nCalculations: How many calculations between valueStart and valueEnd? e.g. vS=0,VE=9,nCalc=10 -> test:0,1,2,3,4,5,6,7,8,9 -# boolCV: Boolian if CrossValidation should be used -def perfFeatMonoV(nameDB, dfImages,feature, parameter, valueStart, valueEnd, nCalculations, boolCV): +# dfImages: Database with all images +# feature: which feature? e.g. ColorHistogram +# para_opt: optimisation parameters +# para_RGB: RGB parameters +# para_HSV: HSV paramters +# para_SIFT: SIFT parameters +# para_SURF: SURF parameters +# para_HOG: HOG paramters +# para_Cl: Classification parameters +def perfFeatMonoV(nameDB, dfImages, para_opt, para_RGB, para_HSV, para_SIFT, para_SURF, para_HOG, para_Cl): # TIME for total calculation t_tot_start = time.time() - # Value check - are the given values possible: e.g. bins valueStart = -1 -> error + # Values from Array into variables - easier to read the code + feature = para_opt[0] + parameter = para_opt[1] + valueStart = para_opt[2] + valueEnd = para_opt[3] + nCalculations = para_opt[4] # Calculate Stepwidth if(nCalculations>1): @@ -39,6 +45,7 @@ def perfFeatMonoV(nameDB, dfImages,feature, parameter, valueStart, valueEnd, nCa valueArray = np.around(np.array(range(0,nCalculations))*step) + valueStart else: valueArray = [valueStart] + # FeatExtraction Results DataFrame df_feat_res = pd.DataFrame() @@ -58,49 +65,53 @@ def perfFeatMonoV(nameDB, dfImages,feature, parameter, valueStart, valueEnd, nCa # Call extraction function with parameters -> returns feature if(feature=="RGB"): # Basic Setup - numberOfBins = 16 - maxColorIntensity = 256 - boolNormMinMax = False + numberOfBins = para_RGB[0] + maxColorIntensity = para_RGB[1] + boolNormMinMax = para_RGB[2] + # ParamaterTest - if(parameter=="Bins"): + if(parameter=="RGB_Bins"): numberOfBins = valuePara - elif(parameter=="MaxCI"): + elif(parameter=="RGB_MaxCI"): maxColorIntensity = valuePara - elif(parameter=="Norm"): - boolNormMinMax = valuePara # Extract Feature from DB feat_desc,f_extr_res = FeatExtraction.calcRGBColorHisto(nameDB, dfImages, numberOfBins, maxColorIntensity, boolNormMinMax) elif(feature=="HSV"): # Basic Setup - h_bins = 8 - s_bins = 3 - v_bins = 3 - histSize = [h_bins, s_bins, v_bins] - boolNormMinMax = False + h_bins = para_HSV[0] + s_bins = para_HSV[1] + v_bins = para_HSV[2] + + boolNormMinMax = para_HSV[3] + + HSV_H_Bins # ParamaterTest - if(parameter=="Bins"): - histSize = valuePara - elif(parameter=="Norm"): - boolNormMinMax = valuePara - + if(parameter=="HSV_H_Bins"): + h_bins = valuePara + elif(parameter=="HSV_S_Bins"): + s_bins = valuePara + elif(parameter=="HSV_V_Bins"): + v_bins = valuePara + + histSize = [h_bins, s_bins, v_bins] + # Extract Feature from DB feat_desc,f_extr_res = FeatExtraction.calcHSVColorHisto(nameDB, dfImages, histSize, boolNormMinMax) elif(feature=="SURF"): # Basic Setup - cluster = 50 - boolNormMinMax = False + cluster = para_SIFT[0] + boolNormMinMax = para_SIFT[1] boolSIFT = False + # ParamaterTest - if(parameter=="Cluster"): + if(parameter=="SIFT_Cluster"): cluster = valuePara - elif(parameter=="Norm"): - boolNormMinMax = valuePara if descriptors is None: descriptors,des_list = FeatExtraction.calcSURFSIFTDescriptors(dfImages, boolSIFT) @@ -110,21 +121,34 @@ def perfFeatMonoV(nameDB, dfImages,feature, parameter, valueStart, valueEnd, nCa elif(feature=="SIFT"): # Basic Setup - cluster = 50 - boolNormMinMax = False + cluster = para_SURF[0] + boolNormMinMax = para_SURF[1] boolSIFT = True + # ParamaterTest - if(parameter=="Cluster"): + if(parameter=="SURF_Cluster"): cluster = valuePara - elif(parameter=="Norm"): - boolNormMinMax = valuePara if descriptors is None: descriptors,des_list = FeatExtraction.calcSURFSIFTDescriptors(dfImages, boolSIFT) # Extract Feature from DB feat_desc,f_extr_res = FeatExtraction.calcSURFSIFTHisto(nameDB, dfImages, cluster, boolNormMinMax, descriptors, des_list, boolSIFT) + elif(feature=="HOG"): + CELL_DIMENSION = para_HOG[0] + NB_ORIENTATIONS = para_HOG[1] + NB_CLUSTERS = para_HOG[2] + MAXITER = para_HOG[3] + NB_CORES = para_HOG[4] + + # ParamaterTest + if(parameter=="HOG_Cluster"): + NB_CLUSTERS = valuePara + + # Extract Feature from DB + feat_desc,f_extr_res = FeatExtraction.calcHOGParallel(nameDB, npImages, CELL_DIMENSION, NB_ORIENTATIONS, NB_CLUSTERS, MAXITER, NB_CORES): + else: print "ERROR: Selected Feature does not exist" print "Feature: " + str(feature) @@ -137,26 +161,23 @@ def perfFeatMonoV(nameDB, dfImages,feature, parameter, valueStart, valueEnd, nCa # TIME for CLASSIFICATION t_classif_start = time.time() + # Values from Array into variables - easier to read the code + split = para_Cl[0] + num_estimators = para_Cl[1] + cv_folds = para_Cl[2] + clas_cores = para_Cl[3] + # Calculate Train/Test data - #Basic Setup - split = 0.7 X_train, X_test, y_train, y_test = ClassifMonoView.calcTrainTest(f_extr_res, dfImages.classLabel, split) - # Own Function for split: ClassifMonoView.calcTrainTestOwn # Begin Classification RandomForest # call function: return fitted model print "### Start:\t Classification Nr:" + str(i) + " from:" + str(max(arr_Calc)) + " ###" - - # Basic Setup - num_estimators = [50, 101, 150, 200] - - if(boolCV==True): - cl_desc, cl_res = ClassifMonoView.calcClassifRandomForestCV(X_train, y_train, num_estimators) - else: - cl_desc, cl_res = ClassifMonoView.calcClassifRandomForest(X_train, X_test, y_test, y_train, num_estimators) - + + cl_desc, cl_res = ClassifMonoView.calcClassifRandomForestCV(X_train, y_train, num_estimators, cv_folds, clas_cores) + print "### Done:\t Classification Nr:" + str(i) + " from:" + str(max(arr_Calc)) + " ###" # TIME for CLASSIFICATION END diff --git a/Code/FeatExtraction/Versions.py b/Code/FeatExtraction/Versions.py new file mode 100644 index 0000000000000000000000000000000000000000..90392028c4f557fd124f686406b17de3a252aedf --- /dev/null +++ b/Code/FeatExtraction/Versions.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +""" Script to render versions of modules used """ + +# Import built-in modules + +# Import 3rd party modules + +# Import own modules + + +# Author-Info +__author__ = "Nikolas Huelsmann" +__status__ = "Development" #Production, Development, Prototype +__date__ = 2016-03-14 + +import sys +print("Python-V.: " + sys.version) + +import cv2 +print("OpenCV2-V.: " + cv2.__version__) + +import pandas as pd +print("Pandas-V.: " + pd.__version__) + +import numpy +print("Numpy-V.: " + numpy.version.version) + +import scipy +print("Scipy-V.: " + scipy.__version__) + +import matplotlib +print("Matplotlib-V.: " + matplotlib.__version__) + +import sklearn +print("Sklearn-V.: " + sklearn.__version__) + +