Skip to content
Snippets Groups Projects
Commit 300ba3d2 authored by bbauvin's avatar bbauvin
Browse files

try that

parents
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
This diff is collapsed.
%% Cell type:markdown id: tags:
# Code to Extract ColorHistograms for Database
%% Cell type:markdown id: tags:
#### Author: Nikolas Hülsmann
#### Date: 2015-11-22
%% Cell type:markdown id: tags:
## Functions for Extract Data
### Function to iterate through given directory and return images paths and classLabels
%% Cell type:code id: tags:
``` python
def imgCrawl(path): #path to 'highest' folder
rootdir = path
df = pd.DataFrame()
for subdir, dirs, files in os.walk(rootdir): # loop through subdirectories
for file in files:
pathOfFile = os.path.join(subdir, file) #path of file
head, classLabel = os.path.split(os.path.split(pathOfFile)[0]) # get directoryname of file as classLabel
df = df.append({'classLabel': classLabel, 'pathOfFile': pathOfFile}, ignore_index=True)
return df
```
%% Cell type:markdown id: tags:
### Function to determine Class-Labels with Integer representation
%% Cell type:code id: tags:
``` python
# function to determine Class-labels and return Series
def getClassLabels(path):
data = os.listdir(path) # listdir returns all subdirectories
index = range(0,len(data))
return pd.Series(data,index)
```
%% Cell type:markdown id: tags:
### Function to calculate the ColorHistogram for given Images
%% Cell type:code id: tags:
``` python
#### Calculate ColorHistograms for all images
# path to higehst folder
# dfImages: Dataframe with paths to all images - use function imgCrawl
# sClassLabel: Series with ClassLabels - use function getClassLabels
def calcColorHisto(path_, dfImages_, sClassLabels_):
# Initialize function
df = pd.DataFrame()
path =path_
npImages = dfImages_.values
sClassLabels = sClassLabels_
## algo
for images in npImages:
image = cv2.imread(images[1])
# Image Size for Normalization
height, width, channels = image.shape
img_size = height * width
# Split into color chanels rgb
chans = cv2.split(image)
colors = ("b", "g", "r")
features = []
i = 1
# loop over the image channels
for chan in chans:
# Calculate Color Histogram - 16 bins cf. paper (test with 64 has shown that die result is similair in score)
hist = cv2.calcHist([chan], [0], None, [16], [0, 256])
print i
i=i+1
# to get raw values
hist = hist[:,0]
# Normalize to a Distrubution from 0 to 1 throug calculating for each color channel (red/blue/green):
# (number of pixels in bin)/(pixel size of image)
hist[:] = [x / img_size for x in hist]
# Normalize with MinMax from 0 to 1 -> feature scaling
#cv2.normalize(hist, hist, 0, 1, cv2.NORM_MINMAX)
features.extend(hist)
# assign integer label for dataframe
classLabel = sClassLabels[sClassLabels == images[0]].index[0]
# append features to df
df = df.append({'classLabel': classLabel, 'ColHisto': features}, ignore_index=True)
return df
```
%% Cell type:markdown id: tags:
### Function to export calculated Data to csv
%% Cell type:code id: tags:
``` python
#### Export ColorHistogram to csv
def exportToCSV(pandasSorDF, filename):
#filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-ColorHistogram"
path = os.getcwdu() + "\\" + filename
if os.path.isfile(path + ".csv"):
for i in range(1,20):
testFileName = filename + "-" + str(i) + ".csv"
if os.path.isfile(os.getcwdu() + "\\" + testFileName)!=True:
pandasSorDF.to_csv(testFileName)
break
else:
pandasSorDF.to_csv(filename + ".csv")
```
%% Cell type:markdown id: tags:
## Main Programm
%% Cell type:code id: tags:
``` python
# Imports
import os # for iteration throug directories
import pandas as pd # for Series and DataFrames
import cv2 # for OpenCV
import datetime # for TimeStamp in CSVFile
```
%% Cell type:code id: tags:
``` python
#### Calculate Color Histogram
path ='D:\CaltechMini'
dfImages = imgCrawl(path)
sClassLabels = getClassLabels(path)
dfColorHistogram = calcColorHisto(path, dfImages, sClassLabels)
fileNameColorHis = datetime.datetime.now().strftime("%Y_%m_%d") + "-Features" +"-ColorHistogram"
exportToCSV(dfColorHistogram, fileNameColorHis)
fileNameClassLabels = datetime.datetime.now().strftime("%Y_%m_%d") + "-ClassLabels" + "-Caltech"
exportToCSV(sClassLabels, fileNameClassLabels)
```
%% Output
1
2
3
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-10-543bf34003a8> in <module>()
3 dfImages = imgCrawl(path)
4 sClassLabels = getClassLabels(path)
----> 5 dfColorHistogram = calcColorHisto(path, dfImages, sClassLabels)
6
7 fileNameColorHis = datetime.datetime.now().strftime("%Y_%m_%d") + "-Features" +"-ColorHistogram"
<ipython-input-9-3a59b70a518a> in calcColorHisto(path_, dfImages_, sClassLabels_)
47
48 # assign integer label for dataframe
---> 49 classLabel = sClassLabels[sClassLabels == images[0]].index[0]
50
51 # append features to df
D:\Programme\Anaconda\lib\site-packages\pandas\core\index.pyc in __getitem__(self, key)
1074
1075 if np.isscalar(key):
-> 1076 return getitem(key)
1077
1078 if isinstance(key, slice):
IndexError: index 0 is out of bounds for axis 0 with size 0
%% Cell type:markdown id: tags:
# Code to Extract ColorHistograms for Database
%% Cell type:markdown id: tags:
#### Author: Nikolas Hülsmann
#### Date: 2015-11-22
%% Cell type:markdown id: tags:
## Functions for Extract Data
### Function to iterate through given directory and return images paths and classLabels
%% Cell type:code id: tags:
``` python
def imgCrawl(path): #path to 'highest' folder
rootdir = path
df = pd.DataFrame()
for subdir, dirs, files in os.walk(rootdir): # loop through subdirectories
for file in files:
pathOfFile = os.path.join(subdir, file) #path of file
head, classLabel = os.path.split(os.path.split(pathOfFile)[0]) # get directoryname of file as classLabel
df = df.append({'classLabel': classLabel, 'pathOfFile': pathOfFile}, ignore_index=True)
return df
```
%% Cell type:markdown id: tags:
### Function to determine Class-Labels with Integer representation
%% Cell type:code id: tags:
``` python
# function to determine Class-labels and return Series
def getClassLabels(path):
data = os.listdir(path) # listdir returns all subdirectories
index = range(0,len(data))
return pd.Series(data,index)
```
%% Cell type:markdown id: tags:
### Function to calculate the ColorHistogram for given Images
%% Cell type:code id: tags:
``` python
#### Calculate ColorHistograms for all images
# path to higehst folder
# dfImages: Dataframe with paths to all images - use function imgCrawl
# sClassLabel: Series with ClassLabels - use function getClassLabels
def calcColorHisto(path_, dfImages_, sClassLabels_):
# Initialize function
df = pd.DataFrame()
path =path_
npImages = dfImages_.values
sClassLabels = sClassLabels_
## algo
for images in npImages:
image = cv2.imread(images[1])
chans = cv2.split(image) # Split into color chanels rgb
colors = ("b", "g", "r")
features = []
# loop over the image channels
for (chan, color) in zip(chans, colors):
# Calculate Color Histogram - 16 bins cf. paper
hist = cv2.calcHist([chan], [0], None, [16], [0, 256])
# to get raw values
hist = hist[:,0]
# Normalize with MinMax from 0 to 1 -> feature scaling
cv2.normalize(hist, hist, 0, 1, cv2.NORM_MINMAX)
features.extend(hist)
# assign integer label for dataframe
classLabel = sClassLabels[sClassLabels == images[0]].index[0]
# append features to df
df = df.append({'classLabel': classLabel, 'ColHisto': features}, ignore_index=True)
return df
```
%% Cell type:markdown id: tags:
### Function to export calculated Data to csv
%% Cell type:code id: tags:
``` python
#### Export ColorHistogram to csv
def exportToCSV(pandasSorDF, filename):
#filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-ColorHistogram"
path = os.getcwdu() + "\\" + filename
if os.path.isfile(path + ".csv"):
for i in range(1,20):
testFileName = filename + "-" + str(i) + ".csv"
if os.path.isfile(os.getcwdu() + "\\" + testFileName)!=True:
pandasSorDF.to_csv(testFileName)
break
else:
pandasSorDF.to_csv(filename + ".csv")
```
%% Cell type:markdown id: tags:
## Main Programm
%% Cell type:code id: tags:
``` python
# Imports
import os # for iteration throug directories
import pandas as pd # for Series and DataFrames
import cv2 # for OpenCV
import datetime # for TimeStamp in CSVFile
```
%% Cell type:code id: tags:
``` python
#### Calculate Color Histogram
path ='D:\Caltech'
dfImages = imgCrawl(path)
sClassLabels = getClassLabels(path)
dfColorHistogram = calcColorHisto(path, dfImages, sClassLabels)
fileNameColorHis = datetime.datetime.now().strftime("%Y_%m_%d") + "-Features" +"-ColorHistogram"
exportToCSV(dfColorHistogram, fileNameColorHis)
fileNameClassLabels = datetime.datetime.now().strftime("%Y_%m_%d") + "-ClassLabels" + "-Caltech"
exportToCSV(sClassLabels, fileNameClassLabels)
```
This diff is collapsed.
This diff is collapsed.
# coding: utf-8
import os as os # for iteration throug directories
import pandas as pd # for Series and DataFrames
import cv2 # for OpenCV
import datetime # for TimeStamp in CSVFile
from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html
import numpy as np # for arrays
import time
# # Code to Extract ColorHistograms for Database
# #### Author: Nikolas Hülsmann
# #### Date: 2015-11-22
# ## Functions for Extract Data
#
# ### Function to iterate through given directory and return images paths and classLabels
# In[31]:
def imgCrawl(path, sClassLabels): #path to 'highest' folder
rootdir = path
df = pd.DataFrame()
for subdir, dirs, files in os.walk(rootdir): # loop through subdirectories
for file in files:
pathOfFile = os.path.join(subdir, file) #path of file
head, classLabel = os.path.split(os.path.split(pathOfFile)[0]) # get directoryname of file as classLabel
# assign integer label for dataframe
classLabel = sClassLabels[sClassLabels == classLabel].index[0]
df = df.append({'classLabel': classLabel, 'pathOfFile': pathOfFile}, ignore_index=True)
return df
# ### Function to determine Class-Labels with Integer representation
# In[32]:
# function to determine Class-labels and return Series
def getClassLabels(path):
data = os.listdir(path) # listdir returns all subdirectories
index = range(0,len(data))
return pd.Series(data,index)
# ### Function to calculate the ColorHistogram for given Images
# In[33]:
#### Calculate ColorHistograms for all images
### Points to improve:
# - use HSV color spectrum
# - change function: parameter how many bins of ColorHistogramm (feature length)
# dfImages: Dataframe with paths to all images - use function imgCrawl
# numberOfBins_: Number of bins Histogram
def calcColorHisto(dfImages_, numberOfBins_):
# Initialize function
df = pd.DataFrame()
npImages = dfImages_.values
numberOfBins = numberOfBins_
npColorHist = np.zeros((len(npImages), numberOfBins*3), "float32")
i=0
## algo
for images in npImages:
image = cv2.imread(images[1])
# Image Size for Normalization
height, width, channels = image.shape
img_size = height * width
# Split into color chanels rgb
chans = cv2.split(image)
colors = ("b", "g", "r")
histogram = []
########### Feature Color Histogram (cf. http://docs.opencv.org/2.4/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.html) # loop over the image channels
for (chan, color) in zip(chans, colors):
# Calculate Color Histogram - 16 bins cf. paper (test with 64 has shown that die result is similair in score)
# Seperates the intesity for each color from 0 to 256, and creates 16 bins of same size: 0-15, 16-31, .. , 241-256
hist = cv2.calcHist([chan], [0], None, [numberOfBins], [0, 256])
# to get raw values
hist = hist[:,0]
# Normalize to a Distrubution from 0 to 1 throug calculating for each color channel (red/blue/green):
# (number of pixels in bin)/(pixel size of image)
#hist[:] = [x / img_size for x in hist]
hist[:] = [x / sum(hist) for x in hist]
# Normalize with MinMax from 0 to 1 -> feature scaling
#cv2.normalize(hist, hist, 0, 1, cv2.NORM_MINMAX)
histogram.extend(hist)
# append features_colHist to df
npColorHist[i] = histogram
i = i+1
#df = df.append({'ColHisto': features_colHist}, ignore_index=True)
return npColorHist
# ### Function to calculate Surf Histogram
# In[34]:
################# FEATURE SURF (cf. http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_feature2d/py_surf_intro/py_surf_intro.html#surf)
# API cf. http://docs.opencv.org/2.4/modules/nonfree/doc/feature_detection.html
#### Calculate Histogramm of SURF Descripteurs with Bag Of Words appraoch for all images
### Points to improve:
# - use spatial histogram: http://www.di.ens.fr/willow/events/cvml2011/materials/practical-classification/
# - change function: parameter how many K clustes/feature length (in regard of overfitting)
# path to higehst folder
# dfImages: Dataframe with paths to all images - use function imgCrawl
# k: number of K-Cluster -> length of feature vector
def calcSurfHisto(dfImages_, k_):
# Initialize function
df = pd.DataFrame()
npImages = dfImages_.values
k = k_
# List where all the descriptors are stored
des_list = []
#### Feature Detection and Description (Surf):
# Detect (localize) for each image the keypoints (Points of Interest in an image - Surf uses therefore like SIFT corners)
# Pro: SIFT/SURF are scale and rotation invariant!
for images in npImages:
# Read image
image = cv2.imread(images[1])
# Method to detect keypoints (kp) and calculate the descripteurs (des) with one function call
# Each image has different amount of kp, but each kp has a describteur of fixed length (128)
kp, des = sift.detectAndCompute(image,None)
des_list.append(des)
# Stack all the descriptors vertically in a numpy array
descriptors = des_list[0][1]
for descriptor in des_list[0:]:
descriptors = np.vstack((descriptors, descriptor))
#### Bag of Words Approach
### 1. Step: using K-means cluster to create dictionary/vocabulary/codebook:
# Encoding is the quantization of the image kp/des that constitute the image to be classified.
# Basic encoding schemes work by first running K-means on the set of all des that you collect
# across multiple images.
# This builds what is known a dictionary/vocabulary/codebook represented by the centroids obtained from the clustering.
# Perform k-means clustering -> creates the words from all describteurs -> this is the (dic) dictionary/vocabulary/codebook
# k: amount of different clusters to build! Will result in a feature length k
dic, variance = kmeans(descriptors, k, 1)
### 2. Step: encoding/coding/vector quantization(vq) to assign each descripteur the closest "visual word" from dictionary:
# At the end of this process, you end up with K representative "visual words" (the centroid of each cluster after
# K means ends) of your image descripteurs. These "visual words" represent what is usually understood as your
# visual dictionary. Once you have these visual words, encoding is the process of assigning
# each descripteur within your image the "visual word" (nearest neighbor) in the dictionary.
npSurfHist = np.zeros((len(npImages), k), "float32")
for i in xrange(len(npImages)):
# vq: (Encoding) Assign words from the dictionary to each descripteur
words, distance = vq(des_list[i],dic)
### 3. Step: Pooling - calculate a histogram for each image
# Pooling refers to the process of representing an image as a "bag of words".
# The word bag here is meant to convey that once you have encoded each descripteur with a word (a number between 1 and K),
# you build a new representation (a bag) that discards the spatial relationship between the words that
# constitute your image.
# This representation is often a histogram or a collection of spatially adjacent histograms of the desribteurs
# (i.e. histograms of values 1 to K) that together form your image. "Pooling" is thus the process of
# building a histogram of words (i.e. pooling ~ "sampling" words from the image to build a probability
# mass function of words)
# To clarify, the purpose of pooling is two fold:
# By building a feature vector that is a histogram of words (as opposed to putting the full "sentence of words"
# in the feature vector), your descriptor will be invariant to changes in "the ordering of words".
# In computer vision this translates into invariance with respect to rotations and distortions of the image
# and object, which is a desirable thing to have.
# If the dictionary is small compared to the length of the sentence, a histogram of words has less dimensions
# than the original vector. Less dimensions makes learning (training) much easier.
# Count the accuarance of each word (w) in image (i) to build histogram
for w in words:
npSurfHist[i][w] += 1
#### 4. Step: Normalization of features vector (Can be changed to distribution like ColorHisto)
# Frequency divided by amount of words (k)
summe = sum(npSurfHist[i])
for x in range(0,k):
#npSurfHist[i][x] = npSurfHist[i][x]/k
npSurfHist[i][x] = npSurfHist[i][x]/summe
#stdSlr = StandardScaler().fit(npSurfHist)
#npSurfHist = stdSlr.transform(npSurfHist)
return npSurfHist
# ### SIFT Experimental - use SURF
# In[35]:
# ########### Feature SIFT (Scale-invariant feature transform cf. http://docs.opencv.org/master/da/df5/tutorial_py_sift_intro.html#gsc.tab=0)
# # Api cf. http://docs.opencv.org/2.4/modules/nonfree/doc/feature_detection.html
# import cv2
# import numpy as np
# img = cv2.imread('../../03-jeux-de-donnees/101_ObjectCategories/airplanes/image_0306.jpg')
# gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# sift = cv2.SIFT(nfeatures=100)
# #sift = cv2.xfeatures2d.SIFT_create()
# # Detector which detects the Keypoints in the Image
# #kp = sift.detect(gray,None)
# # Just a visualization of the Keypoints in the Image
# #img=cv2.drawKeypoints(gray,kp)
# #cv2.imwrite('D:\Sift-test\sift_keypoints.jpg',img)
# # Another visualization with FLAG: draw a circle with size of keypoint and it will even show its orientation
# #img=cv2.drawKeypoints(gray,kp,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# #cv2.imwrite('D:\Sift-test\sift_keypoints.jpg',img)
# # Method to compute the descripteurs after one has already detected the keypoints
# #kp,des = sift.compute(gray,kp)
# #sift = cv2.xfeatures2d.SIFT_create()
# #sift = cv2.SIFT()
# # Method to detect keypoints (kp) and calculate the descripteurs (des) with one function call
# kp, des = sift.detectAndCompute(gray,None)
# print (des.shape)
# ### Functions to export calculated Data to csv
# In[36]:
#### Export Features to csv
def exportToCSV(pandasSorDF, filename):
#filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-Feature"
path = os.getcwdu() + "\\" + filename
if os.path.isfile(path + ".csv"):
for i in range(1,20):
testFileName = filename + "-" + str(i) + ".csv"
if os.path.isfile(os.getcwdu() + "\\" + testFileName)!=True:
pandasSorDF.to_csv(testFileName)
break
else:
pandasSorDF.to_csv(filename + ".csv")
# In[37]:
def exportNumpyToCSV(numpyArray, filename):
#filename = datetime.datetime.now().strftime("%Y_%m_%d") + "-Feature"
path = os.getcwdu() + "\\" + filename
if os.path.isfile(path + ".csv"):
for i in range(1,20):
testFileName = filename + "-" + str(i) + ".csv"
if os.path.isfile(os.getcwdu() + "\\" + testFileName)!=True:
np.savetxt(testFileName, numpyArray, delimiter=",")
break
else:
np.savetxt(filename + ".csv", numpyArray, delimiter=",")
# ## Main Programm
#
# In[38]:
# # Imports
# import os # for iteration throug directories
# import pandas as pd # for Series and DataFrames
# import cv2 # for OpenCV
# import datetime # for TimeStamp in CSVFile
# from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html
# import numpy as np # for arrays
# import time # for time calculations
# # In[39]:
# start = time.time()
# # Determine the Database to extract features
# path ='../../03-jeux-de-donnees/101_ObjectCategories'
# # get dictionary to link classLabels Text to Integers
# sClassLabels = getClassLabels(path)
# # Get all path from all images inclusive classLabel as Integer
# dfImages = imgCrawl(path, sClassLabels)
# print dfImages.classLabel.shape
# fileNameClassLabels = datetime.datetime.now().strftime("%Y_%m_%d") + "-Caltech-ClassLabels"
# exportNumpyToCSV(dfImages.classLabel, fileNameClassLabels)
# fileNameClassLabels = datetime.datetime.now().strftime("%Y_%m_%d") + "-Caltech-ClassLabels-Description"
# #exportToCSV(sClassLabels, fileNameClassLabels)
# end = time.time()
# print "Time to extract all images: " + str(end - start)
# # In[ ]:
# start = time.time()
# # Calculate Color Histogramm wit 16 bins for each color -> feature length = 3 x 16 = 48
# npColorHistogram = calcColorHisto(dfImages, 16)
# print npColorHistogram.shape
# fileNameColorHis = datetime.datetime.now().strftime("%Y_%m_%d") + "-Caltech-Feature-ColorHistogram"
# #exportNumpyToCSV(npColorHistogram, fileNameColorHis)
# end = time.time()
# print "Time to calculate ColorHistogram: " + str(end - start)
# # In[ ]:
# start = time.time()
# # Calculate Surf Histogramm with K=100 Cluster
# npSurfHistogram = calcSurfHisto(dfImages, 5)
# print npSurfHistogram.shape
# fileNameSurfHis = datetime.datetime.now().strftime("%Y_%m_%d") + "-Caltech-Feature-SurfHistogram"
# #exportNumpyToCSV(npSurfHistogram, fileNameSurfHis)
# end = time.time()
# print "Time to calculate SurfHistogram: " + str(end - start)
File added
# Imports
import os as os # for iteration throug directories
import pandas as pd # for Series and DataFrames
import cv2 # for OpenCV
import datetime # for TimeStamp in CSVFile
from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html
import numpy as np # for arrays
import time # for time calculations
from feature_extraction_try import imgCrawl, getClassLabels
#in : npImages, color
# In order to calculate HOG, we will use a bag of word approach : cf SURF function, well documented.
def imageSequencing(npImages, CELL_DIMENSION):
blocksList=[]
for i in range(1):
print npImages[i][1]
image = cv2.imread(npImages[i][1])
cv2.imshow(image)
resizedImage = reSize(image, CELL_DIMENSION)
height, width, channels = resizedImage.shape
blocksList.append(np.array([resizedImage[j*CELL_DIMENSION:j*CELL_DIMENSION+CELL_DIMENSION-1, i*CELL_DIMENSION:i*CELL_DIMENSION+CELL_DIMENSION-1, :] for i in range(width/CELL_DIMENSION) for j in range(height/CELL_DIMENSION)]))
return np.array(blocksList)
def reSize(image, CELL_DIMENSION):
height, width, channels = image.shape
if height%5==0 and width%5==0:
resizedImage = image
elif width%5==0:
missingPixels = 5-height%5
resizedImage = cv2.copyMakeBorder(image,0,missingPixels,0,0,cv2.BORDER_REPLICATE)
elif height%5==0:
missingPixels = 5-width%5
resizedImage = cv2.copyMakeBorder(image,0,0,0,missingPixels,cv2.BORDER_REPLICATE)
else:
missingWidthPixels = 5-width%5
missingHeightPixels = 5-height%5
resizedImage = cv2.copyMakeBorder(image,0,missingHeightPixels,0,missingWidthPixels,cv2.BORDER_REPLICATE)
# height, width, channels = resizedImage.shape
# if height%5==0 and width%5==0:
# print ("My job has been done")
return resizedImage
start = time.time()
path ='../../03-jeux-de-donnees/101_ObjectCategories'
print "Fetching Images in " + path
# get dictionary to link classLabels Text to Integers
sClassLabels = getClassLabels(path)
# Get all path from all images inclusive classLabel as Integer
dfImages = imgCrawl(path, sClassLabels)
npImages = dfImages.values
middle = time.time()
print "Extracted images in " + str(middle-start)
print "Sequencing Images ..."
sequencedCorpus = imageSequencing(npImages, 5)
end = time.time()
print "Sequenced images in " + str(end-middle)
print sequencedCorpus.shape
cv2.imshow(sequencedCorpus[0][0])
# def even(difference):
# return not(difference % 2)
# def findmaxDim(npImages):
# max_height = 0
# max_width = 0
# for npImage in npImages:
# height, width, channels = cv2.imread(npImage[1]).shape
# if height > max_height:
# max_height=height
# if width > max_width:
# max_width=width
# return [max_height, max_width]
# def resizeImage(image, height, width):
# ratio = float(8000000)/(width*height)
# smallImage = cv2.resize(image, (0,0), fx=ratio, fy=ratio)
# return smallImage
# def enlarge(image, maxDimension, color):
# height, width, channels = image.shape
# [height_difference, width_difference] = np.array(maxDimension) - np.array([height, width])
# print(height_difference, width_difference)
# if even(height_difference) and even(width_difference):
# treatedImage = cv2.copyMakeBorder(image, height_difference/2, height_difference/2, width_difference/2, width_difference/2, cv2.BORDER_CONSTANT, value=color)
# elif even(height_difference):
# treatedImage = cv2.copyMakeBorder(image, height_difference/2, height_difference/2, width_difference/2+1, width_difference/2, cv2.BORDER_CONSTANT, value=color)
# elif even(width_difference):
# treatedImage = cv2.copyMakeBorder(image, height_difference/2+1, height_difference/2, width_difference/2, width_difference/2, cv2.BORDER_CONSTANT, value=color)
# else:
# treatedImage = cv2.copyMakeBorder(image, height_difference/2+1, height_difference/2, width_difference/2+1, width_difference/2, cv2.BORDER_CONSTANT, value=color)
# return treatedImage
# def calcHog(npImages, color, maxDimension):
# list_hog = []
# hog = cv2.HOGDescriptor()
# # poulet = preTreat(cv2.imread(npImages[0][1]), maxDimension, color)
# for npImage in npImages:
# image = cv2.imread(npImage[1])
# height, width, channels = image.shape
# if height * width > 8000000:
# g = hog.compute(resizeImage(image, height, width))
# else:
# g = hog.compute(enlarge(image, maxDimension, color))
# print g.shape
# # list_hog = [hog.compute(cv2.imread(npImage[1])) for npImage in npImages]
# return list_hog
# color=[0,0,0]
# path ='../../03-jeux-de-donnees/101_ObjectCategories'
# # get dictionary to link classLabels Text to Integers
# sClassLabels = getClassLabels(path)
# # Get all path from all images inclusive classLabel as Integer
# dfImages = imgCrawl(path, sClassLabels)
# npImages = dfImages.values
# maxDimension = findmaxDim(npImages)
# list_hog = calcHog(npImages, color, maxDimension)
# print len(list_hog)
import os as os # for iteration throug directories
import pandas as pd # for Series and DataFrames
import cv2 # for OpenCV
import datetime # for TimeStamp in CSVFile
from scipy.cluster.vq import * # for Clustering http://docs.scipy.org/doc/scipy/reference/cluster.vq.html
import numpy as np # for arrays
import time # for time calculations
from feature_extraction_try import imgCrawl, getClassLabels
def findmaxDim(npImages):
max_height = 0
max_width = 0
heights =[]
widths = []
totals = []
count=0
poulet=0
for npImage in npImages:
height, width, channels = cv2.imread(npImage[1]).shape
heights.append(height)
widths.append(width)
totals.append(width*height)
if width * height > 500000:
count+=1
if not(abs(height-200) <200):
poulet+=1
print (npImage[1])
if width > max_width:
max_width=width
print float(poulet)*100/len(heights)
# print float(count)*100/len(heights)
return heights, widths, totals
path ='../../03-jeux-de-donnees/101_ObjectCategories'
# get dictionary to link classLabels Text to Integers
sClassLabels = getClassLabels(path)
# Get all path from all images inclusive classLabel as Integer
dfImages = imgCrawl(path, sClassLabels)
npImages = dfImages.values
heights = []
heights, widths, totals= findmaxDim(npImages)
heights_ = sorted(list(set(heights)), reverse=True)
widths_ = sorted(list(set(widths)), reverse=True)
totals_ = sorted(totals, reverse=True)
# print (totals_[len(totals_)/2])
# print ("height", sum(heights)/len(heights), "width", sum(widths)/len(widths) )
# print heights_
# print("poulmmet")
# print widths_
\ No newline at end of file
res 0 → 100644
('height', 244, 'width', 301)
[3999, 2955, 1406, 1280, 1200, 1154, 1071, 1024, 974, 960, 927, 919, 889, 870, 854, 832, 821, 817, 781, 780, 768, 764, 750, 742, 740, 723, 720, 709, 700, 689, 682, 663, 659, 656, 655, 650, 648, 630, 629, 624, 622, 617, 612, 600, 596, 594, 592, 585, 581, 576, 567, 566, 565, 560, 539, 534, 529, 528, 526, 522, 514, 510, 509, 504, 502, 500, 494, 487, 480, 477, 473, 468, 455, 452, 451, 450, 448, 445, 444, 437, 435, 430, 426, 424, 420, 418, 415, 414, 412, 409, 407, 406, 403, 400, 396, 395, 394, 393, 392, 390, 389, 388, 386, 385, 384, 382, 381, 380, 379, 378, 377, 376, 375, 374, 373, 372, 371, 370, 369, 367, 366, 365, 364, 363, 362, 361, 360, 359, 358, 356, 355, 354, 353, 352, 351, 350, 349, 348, 347, 346, 345, 344, 343, 342, 341, 340, 339, 338, 337, 336, 335, 334, 333, 332, 331, 330, 329, 328, 327, 326, 325, 324, 323, 322, 321, 320, 319, 318, 317, 316, 315, 314, 313, 312, 311, 310, 309, 308, 307, 306, 305, 304, 303, 302, 301, 300, 299, 298, 297, 296, 295, 294, 293, 292, 291, 290, 289, 288, 287, 286, 285, 284, 283, 282, 281, 280, 279, 278, 277, 276, 275, 274, 273, 272, 271, 270, 269, 268, 267, 266, 265, 264, 263, 262, 261, 260, 259, 258, 257, 256, 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237, 236, 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213, 212, 211, 210, 209, 208, 207, 206, 205, 204, 203, 202, 201, 200, 199, 198, 197, 196, 195, 194, 193, 192, 191, 190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 172, 171, 170, 169, 168, 167, 166, 165, 164, 163, 162, 161, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, 144, 143, 142, 141, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 130, 129, 128, 127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 108, 107, 106, 104, 103, 102, 101, 97, 92]
poulmmet
[3481, 2799, 1792, 1312, 1280, 1221, 1152, 1132, 1024, 989, 969, 960, 940, 911, 909, 889, 857, 842, 832, 800, 792, 768, 747, 727, 726, 721, 720, 718, 708, 705, 700, 695, 688, 685, 679, 672, 656, 655, 652, 649, 648, 642, 640, 633, 631, 628, 623, 619, 616, 615, 613, 610, 606, 604, 600, 598, 595, 594, 590, 589, 588, 583, 582, 578, 576, 574, 569, 567, 566, 565, 563, 561, 560, 559, 558, 556, 555, 552, 550, 549, 548, 546, 545, 544, 542, 540, 538, 536, 535, 534, 532, 530, 528, 526, 525, 524, 522, 520, 519, 518, 517, 516, 515, 513, 511, 510, 509, 508, 507, 506, 504, 503, 502, 501, 500, 499, 498, 497, 495, 494, 492, 490, 489, 487, 485, 484, 482, 481, 479, 477, 476, 475, 474, 473, 472, 471, 470, 469, 468, 467, 466, 465, 464, 463, 462, 461, 460, 459, 458, 457, 456, 455, 454, 453, 452, 451, 450, 449, 448, 447, 445, 444, 443, 441, 440, 439, 437, 436, 435, 434, 433, 432, 431, 430, 429, 428, 427, 426, 425, 424, 423, 422, 421, 420, 419, 418, 417, 416, 415, 414, 413, 412, 411, 410, 409, 408, 407, 406, 405, 404, 403, 402, 401, 400, 399, 398, 397, 396, 395, 394, 393, 392, 391, 390, 388, 387, 382, 376, 371, 370, 368, 365, 362, 360, 358, 356, 351, 350, 347, 346, 340, 339, 338, 333, 330, 327, 324, 320, 319, 313, 309, 306, 305, 302, 300, 299, 298, 297, 296, 295, 294, 293, 292, 291, 290, 289, 288, 287, 286, 285, 284, 283, 282, 281, 280, 279, 278, 277, 276, 275, 274, 273, 272, 271, 270, 269, 268, 267, 266, 265, 264, 263, 262, 261, 260, 259, 258, 257, 256, 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237, 236, 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213, 212, 211, 210, 209, 208, 207, 206, 205, 204, 203, 202, 201, 200, 199, 198, 197, 196, 195, 194, 193, 192, 191, 190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 173, 172, 171, 170, 169, 168, 167, 166, 165, 164, 163, 162, 161, 160, 159, 158, 157, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, 144, 143, 141, 140, 137, 136, 134, 131, 128, 124, 119, 114, 105, 80]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment