Select Git revision
test_ExecClassif.py
-
Baptiste Bauvin authoredBaptiste Bauvin authored
get_train_annot_YOLO.py 8.74 KiB
import os
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
from p_tqdm import p_map
import ipdb
import random
from datetime import date
import argparse
import cv2
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
from random import randrange
from PIL import Image
from mycolorpy import colorlist as mcp
today = date.today()
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True)
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True)
parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True)
parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True)
parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False)
args = parser.parse_args()
directory = args.directory
DURATION = 8
NB_CLASS = 5
df = pd.read_csv(args.filename_path, low_memory=False)
df.rename(columns={'label':'Code'},inplace=True)
df.rename(columns={'annotation_initial_time':'start'},inplace=True)
df.rename(columns={'annotation_final_time':'stop'},inplace=True)
df.rename(columns={'duree':'d_annot'},inplace=True)
df.rename(columns={'min_frequency':'min_freq'},inplace=True)
df.rename(columns={'max_frequency':'max_freq'},inplace=True)
df.rename(columns={'avg_frequency':'midl_y'},inplace=True)
tab = df.groupby('Code').count()
tab = tab.sort_values(tab.columns[0],ascending = False)[:NB_CLASS]
df = df[df.Code.isin(tab.index)]
try :
df['max_freq'].fillna(9000,inplace = True)
df['min_freq'].fillna(1000,inplace = True)
except Exception:
df['max_freq'] = 9000
df['min_freq'] = 1000
df['midl_y'] = 5000
df['d_annot'] = df.stop - df.start
df['midl'] = (df.stop + df.start)/2
df['Path'] = df[args.columns_name]
df = df[df.d_annot<8]
df = df.reset_index()
list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
data = pd.DataFrame(columns = ['espece','ind'])
for i in range (len(list_espece)):
esp = list_espece.index[i]
new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
data = pd.concat([data,new_col])
liste_espece = data.espece
liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False)
print('\n',data)
#color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))
colors = pd.DataFrame(columns = ['color', 'species'])
for i in range (30):
r = randrange(255)
g = randrange(255)
b = randrange(255)
rand_color = (r, g, b)
new = pd.DataFrame([[rand_color, i]], columns = ['color', 'species'])
colors = pd.concat([colors, new])
def process(x):
count, (f, grp) = x
filename = str(f)
duration = DURATION
while len(grp) != 0:
tab = grp[grp.midl <= grp.start.iloc[0]+7]
fin = pd.DataFrame(columns = ['id','x', 'y', 'width', 'height'])
duree = tab.d_annot
if len(tab)==0:
tab = grp
print(tab)
rd = round(random.uniform(-1.5,1.5),2)
if args.unique == 'multiple':
if tab.start.iloc[0] <= 3:
offset = 0
if tab.start.iloc[0] == 3:
offset = 1
if tab.start.iloc[0] >= 3:
offset = tab.midl.iloc[0] - 3.5 + rd
elif args.unique == 'unique':
offset= 0
window_size = 1024
window = np.hanning(window_size)
y, sr = librosa.load(filename, offset = offset, duration = duration, sr = None)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet')
vmin = np.flipud(np.log10(np.abs(stft))).mean()
vmax = np.flipud(np.log10(np.abs(stft))).max()
plt.close()
plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax)
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
for idxs, row in tab.iterrows():
species = row.Code
x_pxl = (row.midl - offset) / duration
width_pxl = (row.stop-row.start)/duration
if args.mode == 'uniform':
height_pxl = 0.8
y_pxl = 0.5
else:
y_pxl = 1 - (row.midl_y / (sr/2))
height_pxl = (row.max_freq - row.min_freq)/(sr/2)
if height_pxl > 1:
height_pxl = 1
elif height_pxl > y_pxl*2:
y_pxl=y_pxl+0.5*(height_pxl-y_pxl*2)
annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])
fin = pd.concat([fin,annotation])
grp = grp.drop(tab.index)
name = str(row.Path.replace('/','_').replace('.','_')+'_'+str(count))
name_file = os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month)),str(name+'.txt'))
try :
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
fin.to_csv(name_file,sep =' ',header=False,index=False)
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
except :
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month))))
for especes in list_espece.index:
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes))
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all'))
os.mkdir(os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month))))
fin.to_csv(name_file,sep =' ',header=False,index=False)
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
plt.close()
if args.export != None:
im = cv2.imread(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
H,W = im.shape[0], im.shape[1]
for l in range(len(fin)):
x, y, w, h = fin.x.iloc[l]*W , fin.y.iloc[l]*H , fin.w.iloc[l]*W , fin.h.iloc[l]*H
shape1 = (int(x-(0.5*w)), int(y+(0.5*h)))
shape2 = (int(x+(0.5*w)), int(y+(0.5*h)))
shape3 = (int(x+-(0.5*w)), int(y-(0.5*h)))
shape4 = (int(x+(0.5*w)), int(y-(0.5*h)))
#rectangle text shape
shp1 = shape4[0]-10, shape4[1]+20
shp2 = shape4[0], shape4[1]+20
shp3 = shape4[0]-10, shape4[1]
shp4 = shape4[0], shape4[1]
#text placement
text_shape = shp1[0], shp1[1]-5
label = str(fin.id.iloc[l])
cv2.rectangle(im, pt1=shape1, pt2=shape4, color= colors[colors.species == label].color, thickness=1)
cv2.rectangle(im, pt1=shp1 , pt2= shp4, color= colors[colors.species == label].color, thickness= -1)
cv2.putText(im, label, text_shape, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
plt.imshow(im)
try:
plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
except Exception:
os.mkdir(os.path.join(directory,str('images_annotes_'+str(today.day)+'_'+str(today.month))))
plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
plt.close()
p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total = len(df.groupby('Path')))
print('saved to ',directory)